Source code for megadetector.detection.run_inference_with_yolov5_val

"""

run_inference_with_yolov5_val.py

Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's
val.py, converting the output to the standard MD format.  The reasons this script exists,
as an alternative to the standard run_detector_batch.py are:

* This script provides access to YOLO's test-time augmentation tools.
* This script serves a reference implementation: by any reasonable definition, YOLOv5's
  val.py produces the "correct" result for any image, since it matches what was used in
  training.
* This script works for any Ultralytics detection model, including YOLOv8 models

YOLOv5's val.py uses each file's base name as a unique identifier, which doesn't work
when you have typical camera trap images like:

* a/b/c/RECONYX0001.JPG
* d/e/f/RECONYX0001.JPG

...both of which would just be "RECONYX0001.JPG".  So this script jumps through a bunch of
hoops to put a symlinks in a flat folder, run YOLOv5 on that folder, and map the results back
to the real files.

If you are running a YOLOv5 model, this script currently requires the caller to supply the path
where a working YOLOv5 install lives, and assumes that the current conda environment is all set up for
YOLOv5.  If you are running a YOLOv8 model, the folder doesn't matter, but it assumes that ultralytics
tools are available in the current environment.

By default, this script uses symlinks to format the input images in a way that YOLO's
val.py likes, as per above.  This requires admin privileges on Windows... actually technically this
only requires permissions to create symbolic links, but I've never seen a case where someone has
that permission and *doesn't* have admin privileges.  If you are running this script on
Windows and you don't have admin privileges, use --no_use_symlinks, which will make copies of images,
rather than using symlinks.

"""

#%% Imports

import os
import sys
import uuid
import glob
import tempfile
import shutil
import json
import copy
import argparse

from tqdm import tqdm

from megadetector.utils import path_utils
from megadetector.utils import process_utils
from megadetector.utils import string_utils
from megadetector.utils.ct_utils import args_to_object

from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_chunks
from megadetector.utils import ct_utils
from megadetector.utils.path_utils import path_is_abs
from megadetector.data_management import yolo_output_to_md_output
from megadetector.detection.run_detector import try_download_known_detector
from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files

default_image_size_with_augmentation = int(1280 * 1.3)
default_image_size_with_no_augmentation = 1280


#%% Options class

[docs] class YoloInferenceOptions: """ Parameters that control the behavior of run_inference_with_yolov5_val(), including the input/output filenames. """ def __init__(self): ## Required-ish ## #: Folder of images to process (can be None if image_filename_list contains absolute paths) self.input_folder = None #: If this is None, [input_folder] can't be None, we'll process all images in [input_folder]. #: #: If this is not None, and [input_folder] is not None, this should be a list of relative image #: paths within [input_folder] to process, or a .txt or .json file containing a list of #: relative image paths. #: #: If this is not None, and [input_folder] is None, this should be a list of absolute image #: paths, or a .txt or .json file containing a list of absolute image paths. self.image_filename_list = None #: Model filename (ending in .pt), or a well-known model name (e.g. "MDV5A") self.model_filename = None #: .json output file, in MD results format self.output_file = None ## Optional ## #: Required for older YOLOv5 inference, not for newer ulytralytics/YOLOv8 inference self.yolo_working_folder = None #: Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for #: "the yolov5 repo" and "the ultralytics repo". self.model_type = 'yolov5' #: Image size to use; this is a single int, which in ultralytics's terminology means #: "scale the long side of the image to this size, and preserve aspect ratio". #: #: If None, will choose based on whether augmentation is enabled. self.image_size = None #: Detections below this threshold will not be included in the output file self.conf_thres = '0.001' #: Batch size... has no impact on results, but may create memory issues if you set #: this to large values self.batch_size = 1 #: Device string: typically '0' for GPU 0, '1' for GPU 1, etc., or 'cpu' self.device_string = '0' #: Should we enable test-time augmentation? self.augment = False #: Should we enable half-precision inference? self.half_precision_enabled = None #: Where should we stash the temporary symlinks (or copies) used to give unique identifiers to image # files? #: #: If this is None, we'll create a folder in system temp space. self.symlink_folder = None #: Should we use symlinks to give unique identifiers to image files (vs. copies)? self.use_symlinks = True #: How should we guarantee that YOLO IDs (base filenames) are unique? Choices are: #: #: * 'verify': assume image IDs are unique, but verify and error if they're not #: * 'links': create symlinks (or copies, depending on use_symlinks) to enforce uniqueness #: * 'auto': check whether IDs are unique, create links if necessary self.unique_id_strategy = 'links' #: Temporary folder to stash intermediate YOLO results. #: #: If this is None, we'll create a folder in system temp space. self.yolo_results_folder = None #: Should we remove the symlink folder when we're done? self.remove_symlink_folder = True #: Should we remove the intermediate results folder when we're done? self.remove_yolo_results_folder = True #: These are deliberately offset from the standard MD categories; YOLOv5 #: needs categories IDs to start at 0. #: #: This can also be a string that points to any class mapping file supported #: by read_classes_from_yolo_dataset_file(): a YOLO dataset.yaml file, a text #: file with a list of classes, or a .json file with an ID --> name dict self.yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'} #: What should we do if the output file already exists? #: #: Can be 'error', 'skip', or 'overwrite'. self.overwrite_handling = 'skip' #: If True, we'll do a dry run that lets you preview the YOLO val command, without #: actually running it. self.preview_yolo_command_only = False #: By default, if any errors occur while we're copying images or creating symlinks, it's #: game over. If this is True, those errors become warnings, and we plow ahead. self.treat_copy_failures_as_warnings = False #: Save YOLO console output self.save_yolo_debug_output = False #: Whether to search for images recursively within [input_folder] #: #: Ignored if a list of files is provided. self.recursive = True #: Maximum number of images to run in a single chunk self.checkpoint_frequency = None #: By default, if we're creating symlinks to images, we append a unique job ID to the #: symlink folder. If the caller is 100% sure that the symlink folder can be re-used #: across calls, this can be set to False. self.append_job_id_to_symlink_folder = True #: By default, we turn category ID 0 coming out of the YOLO .json file #: into category 1 in the MD-formatted .json file. self.offset_yolo_category_ids = True
# ...def __init__() # ...YoloInferenceOptions() #%% Support functions def _clean_up_temporary_folders(options, symlink_folder,yolo_results_folder, symlink_folder_is_temp_folder,yolo_folder_is_temp_folder): """ Remove temporary symlink/results folders, unless the caller requested that we leave them in place. """ if options.remove_symlink_folder: shutil.rmtree(symlink_folder) elif symlink_folder_is_temp_folder: print('Warning: using temporary symlink folder {}, but not removing it'.format( symlink_folder)) if options.remove_yolo_results_folder: shutil.rmtree(yolo_results_folder) elif yolo_folder_is_temp_folder: print('Warning: using temporary YOLO results folder {}, but not removing it'.format( yolo_results_folder))
[docs] def get_stats_for_category(filename,category='all'): """ Retrieve statistics for a category from the YOLO console output stored in [filenam]. Args: filename (str): a text file containing console output from a YOLO val run category (str, optional): a category name Returns: dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95 """ with open(filename,'r',encoding='utf-8') as f: lines = f.readlines() # This is just a hedge to make sure there isn't some YOLO version floating # around that used different IoU thresholds in the console output. found_map50 = False found_map5095 = False for line in lines: s = line.strip() if ' map50 ' in s.lower() or ' map@.5 ' in s.lower(): found_map50 = True if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower(): found_map5095 = True if not s.startswith(category): continue tokens = s.split(' ') tokens_filtered = list(filter(None,tokens)) if len(tokens_filtered) != 7: continue assert found_map50 and found_map5095, \ 'Parsing error in YOLO console output file {}'.format(filename) to_return = {} to_return['category'] = category assert category == tokens_filtered[0] to_return['n_images'] = int(tokens_filtered[1]) to_return['n_labels'] = int(tokens_filtered[2]) to_return['P'] = float(tokens_filtered[3]) to_return['R'] = float(tokens_filtered[4]) to_return['mAP50'] = float(tokens_filtered[5]) to_return['mAP50-95'] = float(tokens_filtered[6]) return to_return # ...for each line return None
#%% Main function
[docs] def run_inference_with_yolo_val(options): """ Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's val.py, converting the output to the standard MD format. Args: options (YoloInferenceOptions): all the parameters used to control this process, including filenames; see YoloInferenceOptions for details """ ##%% Input and path handling default_options = YoloInferenceOptions() for k in options.__dict__.keys(): if k not in default_options.__dict__: # Print warnings about unexpected variables, except for things like # "no_append_job_id_to_symlink_folder", which just negate existing objects if not k.startswith('no_'): print('Warning: unexpected variable {} in options object'.format(k)) if options.model_type == 'yolov8': print('Warning: model type "yolov8" supplied, "ultralytics" is the preferred model ' + \ 'type string for YOLOv8 models') options.model_type = 'ultralytics' if (options.model_type == 'yolov5') and ('yolov8' in options.model_filename.lower()): print('\n\n*** Warning: model type set as "yolov5", but your model filename contains "yolov8"... ' + \ 'did you mean to use --model_type yolov8?" ***\n\n') if options.yolo_working_folder is None: assert options.model_type == 'ultralytics', \ 'A working folder is required to run YOLOv5 val.py' else: assert os.path.isdir(options.yolo_working_folder), \ 'Could not find working folder {}'.format(options.yolo_working_folder) if options.half_precision_enabled is not None: assert options.half_precision_enabled in (0,1), \ 'Invalid value {} for --half_precision_enabled (should be 0 or 1)'.format( options.half_precision_enabled) # If the model filename is a known model string (e.g. "MDv5A", download the model if necessary) model_filename = try_download_known_detector(options.model_filename) assert os.path.isfile(model_filename), \ 'Could not find model file {}'.format(model_filename) assert (options.input_folder is not None) or (options.image_filename_list is not None), \ 'You must specify a folder and/or a file list' if options.input_folder is not None: assert os.path.isdir(options.input_folder), 'Could not find input folder {}'.format( options.input_folder) if os.path.exists(options.output_file): if options.overwrite_handling == 'skip': print('Warning: output file {} exists, skipping'.format(options.output_file)) return elif options.overwrite_handling == 'overwrite': print('Warning: output file {} exists, overwriting'.format(options.output_file)) elif options.overwrite_handling == 'error': raise ValueError('Output file {} exists'.format(options.output_file)) else: raise ValueError('Unknown output handling method {}'.format(options.overwrite_handling)) output_dir = os.path.dirname(options.output_file) if len(output_dir) > 0: os.makedirs(output_dir, exist_ok=True) if options.input_folder is not None: options.input_folder = options.input_folder.replace('\\','/') ##%% Other input handling if isinstance(options.yolo_category_id_to_name,str): assert os.path.isfile(options.yolo_category_id_to_name) yolo_dataset_file = options.yolo_category_id_to_name options.yolo_category_id_to_name = \ yolo_output_to_md_output.read_classes_from_yolo_dataset_file(yolo_dataset_file) print('Loaded {} category mappings from {}'.format( len(options.yolo_category_id_to_name),yolo_dataset_file)) temporary_folder = None symlink_folder_is_temp_folder = False yolo_folder_is_temp_folder = False job_id = str(uuid.uuid1()) def get_job_temporary_folder(tf): if tf is not None: return tf tempdir_base = tempfile.gettempdir() tf = os.path.join(tempdir_base,'md_to_yolo','md_to_yolo_' + job_id) os.makedirs(tf,exist_ok=True) return tf symlink_folder = options.symlink_folder yolo_results_folder = options.yolo_results_folder if symlink_folder is None: temporary_folder = get_job_temporary_folder(temporary_folder) symlink_folder = os.path.join(temporary_folder,'symlinks') symlink_folder_is_temp_folder = True if yolo_results_folder is None: temporary_folder = get_job_temporary_folder(temporary_folder) yolo_results_folder = os.path.join(temporary_folder,'yolo_results') yolo_folder_is_temp_folder = True if options.append_job_id_to_symlink_folder: # Attach a GUID to the symlink folder, regardless of whether we created it symlink_folder_inner = os.path.join(symlink_folder,job_id) else: print('Re-using existing symlink folder {}'.format(symlink_folder)) symlink_folder_inner = symlink_folder os.makedirs(symlink_folder_inner,exist_ok=True) os.makedirs(yolo_results_folder,exist_ok=True) ##%% Enumerate images image_files_relative = None image_files_absolute = None # If the caller just provided a folder, not a list of files... if options.image_filename_list is None: assert options.input_folder is not None and os.path.isdir(options.input_folder), \ 'Could not find input folder {}'.format(options.input_folder) image_files_relative = path_utils.find_images(options.input_folder, recursive=options.recursive, return_relative_paths=True, convert_slashes=True) image_files_absolute = [os.path.join(options.input_folder,fn) for \ fn in image_files_relative] else: # If the caller provided a list of image files (rather than a filename pointing # to a list of image files)... if is_iterable(options.image_filename_list) and not isinstance(options.image_filename_list,str): image_files_relative = options.image_filename_list # If the caller provided a filename pointing to a list of image files... else: assert isinstance(options.image_filename_list,str), \ 'Unrecognized image filename list object type: {}'.format(options.image_filename_list) assert os.path.isfile(options.image_filename_list), \ 'Could not find image filename list file: {}'.format(options.image_filename_list) ext = os.path.splitext(options.image_filename_list)[-1].lower() assert ext in ('.json','.txt'), \ 'Unrecognized image filename list file extension: {}'.format(options.image_filename_list) if ext == '.json': with open(options.image_filename_list,'r') as f: image_files_relative = json.load(f) assert is_iterable(image_files_relative) else: assert ext == '.txt' with open(options.image_filename_list,'r') as f: image_files_relative = f.readlines() image_files_relative = [s.strip() for s in image_files_relative] # ...whether the image filename list was supplied as list vs. a filename if options.input_folder is None: image_files_absolute = image_files_relative else: # The list should be relative filenames for fn in image_files_relative: assert not path_is_abs(fn), \ 'When providing a folder and a list, paths in the list should be relative' image_files_absolute = \ [os.path.join(options.input_folder,fn) for fn in image_files_relative] for fn in image_files_absolute: assert os.path.isfile(fn), 'Could not find image file {}'.format(fn) # ...whether the caller supplied a list of filenames image_files_absolute = [fn.replace('\\','/') for fn in image_files_absolute] del image_files_relative ##%% Recurse if necessary to handle checkpoints if options.checkpoint_frequency is not None and options.checkpoint_frequency > 0: chunks = split_list_into_fixed_size_chunks(image_files_absolute,options.checkpoint_frequency) chunk_output_files = [] # i_chunk = 0; chunk_files_abs = chunks[i_chunk] for i_chunk,chunk_files_abs in enumerate(chunks): print('Processing {} images from chunk {} of {}'.format( len(chunk_files_abs),i_chunk,len(chunks))) chunk_options = copy.deepcopy(options) # Run each chunk without checkpointing chunk_options.checkpoint_frequency = None if options.input_folder is not None: chunk_files_relative = \ [os.path.relpath(fn,options.input_folder) for fn in chunk_files_abs] chunk_options.image_filename_list = chunk_files_relative else: chunk_options.image_filename_list = chunk_files_abs chunk_options.image_filename_list = \ [fn.replace('\\','/') for fn in chunk_options.image_filename_list] chunk_string = 'chunk_{}'.format(str(i_chunk).zfill(5)) chunk_options.yolo_results_folder = yolo_results_folder + '_' + chunk_string chunk_options.symlink_folder = symlink_folder + '_' + chunk_string # Put the output file in the parent job's scratch folder chunk_output_file = os.path.join(yolo_results_folder,chunk_string + '_results_md_format.json') chunk_output_files.append(chunk_output_file) chunk_options.output_file = chunk_output_file if os.path.isfile(chunk_output_file): print('Chunk output file {} exists, checking completeness'.format(chunk_output_file)) with open(chunk_output_file,'r') as f: chunk_results = json.load(f) images_in_this_chunk_results_file = [im['file'] for im in chunk_results['images']] assert len(images_in_this_chunk_results_file) == len(chunk_options.image_filename_list), \ f'Expected {len(chunk_options.image_filename_list)} images in ' + \ f'chunk results file {chunk_output_file}, found {len(images_in_this_chunk_results_file)}, ' + \ 'possibly this is left over from a previous job?' for fn in images_in_this_chunk_results_file: assert fn in chunk_options.image_filename_list, \ f'Unexpected image {fn} in chunk results file {chunk_output_file}, ' + \ 'possibly this is left over from a previous job?' print('Chunk output file {} exists and is complete, skipping this chunk'.format( chunk_output_file)) # ...if the outptut file exists else: run_inference_with_yolo_val(chunk_options) # ...if we do/don't have to run this chunk assert os.path.isfile(chunk_options.output_file) # ...for each chunk # Merge _ = combine_batch_output_files(input_files=chunk_output_files, output_file=options.output_file, require_uniqueness=True, verbose=True) # Validate with open(options.output_file,'r') as f: combined_results = json.load(f) assert len(combined_results['images']) == len(image_files_absolute), \ 'Expected {} images in merged output file, found {}'.format( len(image_files_absolute),len(combined_results['images'])) # Clean up _clean_up_temporary_folders(options, symlink_folder,yolo_results_folder, symlink_folder_is_temp_folder,yolo_folder_is_temp_folder) return # ...if we need to make recursive calls for file chunks ##%% Create symlinks (or copy images) to give a unique ID to each image # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output) # to the *original full path* for each image (not the symlink path). image_id_to_file = {} # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output) # to errors, including errors that happen before we run the model at all (e.g. file access errors). image_id_to_error = {} create_links = True if options.unique_id_strategy == 'links': create_links = True else: assert options.unique_id_strategy in ('auto','verify'), \ 'Unknown unique ID strategy {}'.format(options.unique_id_strategy) image_ids_are_unique = True for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)): image_id = os.path.splitext(os.path.basename(image_fn))[0] # Is this image ID unique? if image_id in image_id_to_file: if options.unique_id_strategy == 'verify': raise ValueError('"verify" specified for image uniqueness, but ' + 'image ID {} occurs more than once:\n\n{}\n\n{}'.format( image_id,image_fn,image_id_to_file[image_id])) else: assert options.unique_id_strategy == 'auto' image_ids_are_unique = False image_id_to_file = {} break image_id_to_file[image_id] = image_fn # ...for each image if image_ids_are_unique: print('"{}" specified for image uniqueness and images are unique, skipping links'.format( options.unique_id_strategy)) assert len(image_id_to_file) == len(image_files_absolute) create_links = False else: assert options.unique_id_strategy == 'auto' create_links = True link_type = 'copies' if options.use_symlinks: link_type = 'links' print('"auto" specified for image uniqueness and images are not unique, defaulting to {}'.format( link_type)) # ...which unique ID strategy? if create_links: if options.use_symlinks: print('Creating {} symlinks in {}'.format(len(image_files_absolute),symlink_folder_inner)) else: print('Symlinks disabled, copying {} images to {}'.format(len(image_files_absolute),symlink_folder_inner)) link_full_paths = [] # i_image = 0; image_fn = image_files_absolute[i_image] for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)): ext = os.path.splitext(image_fn)[1] image_fn_without_extension = os.path.splitext(image_fn)[0] # YOLO .json output identifies images by the base filename without the extension image_id = str(i_image).zfill(10) image_id_to_file[image_id] = image_fn symlink_name = image_id + ext symlink_full_path = os.path.join(symlink_folder_inner,symlink_name) link_full_paths.append(symlink_full_path) # If annotation files exist, link those too; only useful if we're reading the computed # mAP value, but it doesn't hurt. annotation_fn = image_fn_without_extension + '.txt' annotation_file_exists = False if os.path.isfile(annotation_fn): annotation_file_exists = True annotation_symlink_name = image_id + '.txt' annotation_symlink_full_path = os.path.join(symlink_folder_inner,annotation_symlink_name) try: if options.use_symlinks: path_utils.safe_create_link(image_fn,symlink_full_path) if annotation_file_exists: path_utils.safe_create_link(annotation_fn,annotation_symlink_full_path) else: shutil.copyfile(image_fn,symlink_full_path) if annotation_file_exists: shutil.copyfile(annotation_fn,annotation_symlink_full_path) except Exception as e: error_string = str(e) image_id_to_error[image_id] = error_string # Always break if the user is trying to create symlinks on Windows without # permission, 100% of images will always fail in this case. if ('a required privilege is not held by the client' in error_string.lower()) or \ (not options.treat_copy_failures_as_warnings): print('\nError copying/creating link for input file {}: {}'.format( image_fn,error_string)) raise else: print('Warning: error copying/creating link for input file {}: {}'.format( image_fn,error_string)) continue # ...except # ...for each image # ...if we need to create links/copies ##%% Create the dataset file if necessary # This may have been passed in as a string, but at this point, we should have # loaded the dataset file. assert isinstance(options.yolo_category_id_to_name,dict) # Category IDs need to be continuous integers starting at 0 category_ids = sorted(list(options.yolo_category_id_to_name.keys())) assert category_ids[0] == 0 assert len(category_ids) == 1 + category_ids[-1] yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml') yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt') with open(yolo_image_list_file,'w') as f: if create_links: image_files_to_write = link_full_paths else: image_files_to_write = image_files_absolute for fn_abs in image_files_to_write: # At least in YOLOv5 val (need to verify for YOLOv8 val), filenames in this # text file are treated as relative to the text file itself if they start with # "./", otherwise they're treated as absolute paths. Since we don't want to put this # text file in the image folder, we'll use absolute paths. # fn_relative = os.path.relpath(fn_abs,options.input_folder) # f.write(fn_relative + '\n') f.write(fn_abs + '\n') if create_links: inference_folder = symlink_folder_inner else: # This doesn't matter, but it has to be a valid path inference_folder = options.yolo_results_folder with open(yolo_dataset_file,'w') as f: f.write('path: {}\n'.format(inference_folder)) # These need to be valid paths, even if you're not using them, and "." is always safe f.write('train: .\n') f.write('val: .\n') f.write('test: {}\n'.format(yolo_image_list_file)) f.write('\n') f.write('nc: {}\n'.format(len(options.yolo_category_id_to_name))) f.write('\n') f.write('names:\n') for category_id in category_ids: assert isinstance(category_id,int) f.write(' {}: {}\n'.format(category_id, options.yolo_category_id_to_name[category_id])) ##%% Prepare Python command or YOLO CLI command if options.image_size is None: if options.augment: image_size = default_image_size_with_augmentation else: image_size = default_image_size_with_no_augmentation else: image_size = options.image_size image_size_string = str(round(image_size)) if options.model_type == 'yolov5': cmd = 'python val.py --task test --data "{}"'.format(yolo_dataset_file) cmd += ' --weights "{}"'.format(model_filename) cmd += ' --batch-size {} --imgsz {} --conf-thres {}'.format( options.batch_size,image_size_string,options.conf_thres) cmd += ' --device "{}" --save-json'.format(options.device_string) cmd += ' --project "{}" --name "{}" --exist-ok'.format(yolo_results_folder,'yolo_results') # This is the NMS IoU threshold # cmd += ' --iou-thres 0.6' if options.augment: cmd += ' --augment' # --half is a store_true argument for YOLOv5's val.py if (options.half_precision_enabled is not None) and (options.half_precision_enabled == 1): cmd += ' --half' # Sometimes useful for debugging # cmd += ' --save_conf --save_txt' elif options.model_type == 'ultralytics': if options.augment: augment_string = 'augment' else: augment_string = '' cmd = 'yolo val {} model="{}" imgsz={} batch={} data="{}" project="{}" name="{}" device="{}"'.\ format(augment_string,model_filename,image_size_string,options.batch_size, yolo_dataset_file,yolo_results_folder,'yolo_results',options.device_string) cmd += ' save_json exist_ok' if (options.half_precision_enabled is not None): if options.half_precision_enabled == 1: cmd += ' --half=True' else: assert options.half_precision_enabled == 0 cmd += ' --half=False' # Sometimes useful for debugging # cmd += ' save_conf save_txt' else: raise ValueError('Unrecognized model type {}'.format(options.model_type)) # print(cmd); import clipboard; clipboard.copy(cmd) ##%% Run YOLO command if options.yolo_working_folder is not None: current_dir = os.getcwd() os.chdir(options.yolo_working_folder) print('Running YOLO inference command:\n{}\n'.format(cmd)) if options.preview_yolo_command_only: if options.remove_symlink_folder: try: print('Removing YOLO symlink folder {}'.format(symlink_folder)) shutil.rmtree(symlink_folder) except Exception: print('Warning: error removing symlink folder {}'.format(symlink_folder)) pass if options.remove_yolo_results_folder: try: print('Removing YOLO results folder {}'.format(yolo_results_folder)) shutil.rmtree(yolo_results_folder) except Exception: print('Warning: error removing YOLO results folder {}'.format(yolo_results_folder)) pass # sys.exit() return execution_result = process_utils.execute_and_print(cmd,encoding='utf-8',verbose=True) assert execution_result['status'] == 0, 'Error running {}'.format(options.model_type) yolo_console_output = execution_result['output'] if options.save_yolo_debug_output: with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w',encoding='utf-8') as f: for s in yolo_console_output: f.write(s + '\n') ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_file.json'), image_id_to_file) ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_error.json'), image_id_to_error) # YOLO console output contains lots of ANSI escape codes, remove them for easier parsing yolo_console_output = [string_utils.remove_ansi_codes(s) for s in yolo_console_output] # Find errors that occurred during the initial corruption check; these will not be included in the # output. Errors that occur during inference will be handled separately. yolo_read_failures = [] for line in yolo_console_output: # # Lines indicating read failures look like: # # For ultralytics val: # # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'" # # For yolov5 val.py: # # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'" # # In both cases, when we are using symlinks, the first filename is the symlink name, the # second filename is the target, e.g.: # # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'" # # Windows example: # # line = "test: WARNING: g:\\temp\\md-test-images\\corrupt-images\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg: ignoring corrupt image/label: cannot identify image file 'g:\\\\temp\\\\md-test-images\\\\corrupt-images\\\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg'" # line = line.replace('⚠️',':') if 'ignoring corrupt image/label' in line: line_tokens = line.split('ignoring corrupt image/label') assert len(line_tokens) == 2 tokens = line_tokens[0].split(':') # ,maxsplit=3) tokens = [s.strip() for s in tokens] # ['test', ' WARNING', ' a/b/c/d.jpg', ' '] assert len(tokens[-1]) == 0 tokens = tokens[:-1] assert 'warning' in tokens[1].lower() if len(tokens) == 3: image_name = tokens[2].strip() else: # Windows filenames have one extra colon assert len(tokens) == 4 assert len(tokens[2]) == 1 image_name = ':'.join(tokens[2:4]) yolo_read_failures.append(image_name) # ...if this line indicated a corrupt image # ...for each line in the console output # image_file = yolo_read_failures[0] for image_file in yolo_read_failures: image_id = os.path.splitext(os.path.basename(image_file))[0] assert image_id in image_id_to_file, 'Unexpected image ID {}'.format(image_id) if image_id not in image_id_to_error: image_id_to_error[image_id] = 'YOLO read failure' if options.yolo_working_folder is not None: os.chdir(current_dir) ##%% Convert results to MD format json_files = glob.glob(yolo_results_folder + '/yolo_results/*.json') assert len(json_files) == 1 yolo_json_file = json_files[0] # Map YOLO image IDs to paths image_id_to_relative_path = {} for image_id in image_id_to_file: fn = image_id_to_file[image_id].replace('\\','/') assert path_is_abs(fn) if options.input_folder is not None: assert os.path.isdir(options.input_folder) assert options.input_folder in fn, 'Internal error: base folder {} not in file {}'.format( options.input_folder,fn) relative_path = os.path.relpath(fn,options.input_folder) else: # We'll use the absolute path as a relative path, and pass '/' # as the base path in this case. relative_path = fn image_id_to_relative_path[image_id] = relative_path # Are we working with a base folder? if options.input_folder is not None: assert os.path.isdir(options.input_folder) image_base = options.input_folder else: image_base = '/' yolo_output_to_md_output.yolo_json_output_to_md_output( yolo_json_file=yolo_json_file, image_folder=image_base, output_file=options.output_file, yolo_category_id_to_name=options.yolo_category_id_to_name, detector_name=os.path.basename(model_filename), image_id_to_relative_path=image_id_to_relative_path, image_id_to_error=image_id_to_error, offset_yolo_class_ids=options.offset_yolo_category_ids) ##%% Clean up _clean_up_temporary_folders(options, symlink_folder,yolo_results_folder, symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
# ...def run_inference_with_yolo_val() #%% Command-line driver def main(): # noqa options = YoloInferenceOptions() parser = argparse.ArgumentParser() parser.add_argument( 'model_filename',type=str, help='model file name') parser.add_argument( 'input_folder',type=str, help='folder on which to recursively run the model, or a .json or .txt file ' + \ 'containing a list of absolute image paths') parser.add_argument( 'output_file',type=str, help='.json file where output will be written') parser.add_argument( '--image_filename_list',type=str,default=None, help='.json or .txt file containing a list of relative image filenames within [input_folder]') parser.add_argument( '--yolo_working_folder',type=str,default=None, help='folder in which to execute val.py (not necessary for YOLOv8 inference)') parser.add_argument( '--image_size', default=None, type=int, help='image size for model execution (default {} when augmentation is enabled, else {})'.format( default_image_size_with_augmentation,default_image_size_with_no_augmentation)) parser.add_argument( '--conf_thres', default=options.conf_thres, type=float, help='confidence threshold for including detections in the output file (default {})'.format( options.conf_thres)) parser.add_argument( '--batch_size', default=options.batch_size, type=int, help='inference batch size (default {})'.format(options.batch_size)) parser.add_argument( '--half_precision_enabled', default=None, type=int, help='use half-precision-inference (1 or 0) (default is the underlying model\'s default, ' + \ 'probably full for YOLOv8 and half for YOLOv5') parser.add_argument( '--device_string', default=options.device_string, type=str, help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for ' + \ 'M1/M2 devices, or "cpu" (default {})'.format( options.device_string)) parser.add_argument( '--overwrite_handling', default=options.overwrite_handling, type=str, help='action to take if the output file exists (skip, error, overwrite) (default {})'.format( options.overwrite_handling)) parser.add_argument( '--yolo_dataset_file', default=None, type=str, help='YOLOv5 dataset.yaml file from which we should load category information ' + \ '(otherwise defaults to MD categories)') parser.add_argument( '--model_type', default=options.model_type, type=str, help='model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format( options.model_type)) parser.add_argument('--unique_id_strategy', default=options.unique_id_strategy, type=str, help='how should we ensure that unique filenames are passed to the YOLO val script, ' + \ 'can be "verify", "auto", or "links", see options class docs for details (default {})'.format( options.unique_id_strategy)) parser.add_argument( '--symlink_folder', default=None, type=str, help='temporary folder for symlinks (defaults to a folder in the system temp dir)') parser.add_argument( '--yolo_results_folder', default=None, type=str, help='temporary folder for YOLO intermediate output (defaults to a folder in the system temp dir)') parser.add_argument( '--no_use_symlinks', action='store_true', help='copy files instead of creating symlinks when preparing the yolo input folder') parser.add_argument( '--no_remove_symlink_folder', action='store_true', help='don\'t remove the temporary folder full of symlinks') parser.add_argument( '--no_remove_yolo_results_folder', action='store_true', help='don\'t remove the temporary folder full of YOLO intermediate files') parser.add_argument( '--save_yolo_debug_output', action='store_true', help='write yolo console output to a text file in the results folder, along with additional debug files') parser.add_argument( '--checkpoint_frequency', default=options.checkpoint_frequency, type=int, help='break the job into chunks with no more than this many images (default {})'.format( options.checkpoint_frequency)) parser.add_argument( '--no_append_job_id_to_symlink_folder', action='store_true', help="don't append a unique job ID to the symlink folder name") parser.add_argument( '--nonrecursive', action='store_true', help='disable recursive folder processing') parser.add_argument( '--no_offset_class_ids', action='store_true', help='disable class ID offsetting') parser.add_argument( '--preview_yolo_command_only', action='store_true', help='don\'t run inference, just preview the YOLO inference command (still creates symlinks)') if options.augment: default_augment_enabled = 1 else: default_augment_enabled = 0 parser.add_argument( '--augment_enabled', default=default_augment_enabled, type=int, help='enable/disable augmentation (default {})'.format(default_augment_enabled)) if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() # If the caller hasn't specified an image size, choose one based on whether augmentation # is enabled. if args.image_size is None: assert args.augment_enabled in (0,1), \ 'Illegal augment_enabled value {}'.format(args.augment_enabled) if args.augment_enabled == 1: args.image_size = default_image_size_with_augmentation else: args.image_size = default_image_size_with_no_augmentation augment_enabled_string = 'enabled' if not args.augment_enabled: augment_enabled_string = 'disabled' print('Augmentation is {}, using default image size {}'.format( augment_enabled_string,args.image_size)) args_to_object(args, options) if args.yolo_dataset_file is not None: options.yolo_category_id_to_name = args.yolo_dataset_file # The function convention is that input_folder should be None when we want to use a list of # absolute paths, but the CLI convention is that the required argument is always valid, whether # it's a folder or a list of absolute paths. if os.path.isfile(options.input_folder): assert options.image_filename_list is None, \ 'image_filename_list should not be specified when input_folder is a file' options.image_filename_list = options.input_folder options.input_folder = None options.recursive = (not options.nonrecursive) options.append_job_id_to_symlink_folder = (not options.no_append_job_id_to_symlink_folder) options.remove_symlink_folder = (not options.no_remove_symlink_folder) options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder) options.use_symlinks = (not options.no_use_symlinks) options.augment = (options.augment_enabled > 0) options.offset_yolo_category_ids = (not options.no_offset_class_ids) del options.nonrecursive del options.no_remove_symlink_folder del options.no_remove_yolo_results_folder del options.no_use_symlinks del options.augment_enabled del options.yolo_dataset_file del options.no_offset_class_ids print(options.__dict__) run_inference_with_yolo_val(options) if __name__ == '__main__': main() #%% Interactive driver if False: #%% Debugging input_folder = r'g:\temp\md-test-images' model_filename = 'MDV5A' output_folder = r'g:\temp\yolo-test-out' yolo_working_folder = r'c:\git\yolov5-md' dataset_file = r"g:\temp\md-test-images\dataset.yaml" job_name = 'yolo-debug' symlink_folder = os.path.join(output_folder,'symlinks') yolo_results_folder = os.path.join(output_folder,'yolo_results') model_name = os.path.splitext(os.path.basename(model_filename))[0] output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format( job_name,model_name)) options = YoloInferenceOptions() options.yolo_working_folder = yolo_working_folder options.input_folder = input_folder options.output_file = output_file options.yolo_category_id_to_name = dataset_file options.augment = False options.conf_thres = '0.001' options.batch_size = 1 options.device_string = '0' options.unique_id_strategy = 'auto' options.overwrite_handling = 'overwrite' if options.augment: options.image_size = round(1280 * 1.3) else: options.image_size = 1280 options.model_filename = model_filename options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results') options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks') options.use_symlinks = False options.remove_symlink_folder = True options.remove_yolo_results_folder = True options.checkpoint_frequency = None cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \ f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \ f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \ f' --batch_size {options.batch_size} ' + \ f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}' if not options.remove_symlink_folder: cmd += ' --no_remove_symlink_folder' if not options.remove_yolo_results_folder: cmd += ' --no_remove_yolo_results_folder' if options.checkpoint_frequency is not None: cmd += f' --checkpoint_frequency {options.checkpoint_frequency}' if not options.use_symlinks: cmd += ' --no_use_symlinks' if not options.augment: cmd += ' --augment_enabled 0' print(cmd) execute_in_python = False if execute_in_python: run_inference_with_yolo_val(options) else: import clipboard; clipboard.copy(cmd) #%% Run inference on a folder input_folder = r'g:\temp\tegu-val-mini'.replace('\\','/') model_filename = r'g:\temp\usgs-tegus-yolov5x-231003-b8-img1280-e3002-best.pt' output_folder = r'g:\temp\tegu-scratch' yolo_working_folder = r'c:\git\yolov5-tegus' dataset_file = r'g:\temp\dataset.yaml' # This only impacts the output file name, it's not passed to the inference function job_name = 'yolo-inference-test' model_name = os.path.splitext(os.path.basename(model_filename))[0] symlink_folder = os.path.join(output_folder,'symlinks') yolo_results_folder = os.path.join(output_folder,'yolo_results') output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format( job_name,model_name)) options = YoloInferenceOptions() options.yolo_working_folder = yolo_working_folder options.input_folder = input_folder options.output_file = output_file pass_image_filename_list = False pass_relative_paths = True if pass_image_filename_list: if pass_relative_paths: options.image_filename_list = [ r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG", r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG" ] else: options.image_filename_list = [ r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG", r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG" ] else: options.image_filename_list = None options.yolo_category_id_to_name = dataset_file options.augment = False options.conf_thres = '0.001' options.batch_size = 1 options.device_string = '0' options.unique_id_strategy = 'auto' options.overwrite_handling = 'overwrite' if options.augment: options.image_size = round(1280 * 1.3) else: options.image_size = 1280 options.model_filename = model_filename options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results') options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks') options.use_symlinks = False options.remove_symlink_folder = True options.remove_yolo_results_folder = True options.checkpoint_frequency = 5 cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \ f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \ f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \ f' --batch_size {options.batch_size} ' + \ f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}' if not options.remove_symlink_folder: cmd += ' --no_remove_symlink_folder' if not options.remove_yolo_results_folder: cmd += ' --no_remove_yolo_results_folder' if options.checkpoint_frequency is not None: cmd += f' --checkpoint_frequency {options.checkpoint_frequency}' if not options.use_symlinks: cmd += ' --no_use_symlinks' if not options.augment: cmd += ' --augment_enabled 0' print(cmd) execute_in_python = False if execute_in_python: run_inference_with_yolo_val(options) else: import clipboard; clipboard.copy(cmd)