Source code for megadetector.visualization.visualize_detector_output

"""

visualize_detector_output.py

Render images with bounding boxes annotated on them to a folder, based on a
detector output result file (.json), optionally writing an HTML index file.

"""

#%% Imports

import argparse
import os
import random
import sys

from multiprocessing.pool import ThreadPool
from multiprocessing.pool import Pool
from functools import partial
from tqdm import tqdm

from megadetector.data_management.annotations.annotation_constants import detector_bbox_category_id_to_name
from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
from megadetector.utils.ct_utils import get_max_conf
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
from megadetector.utils import write_html_image_list
from megadetector.utils.path_utils import path_is_abs
from megadetector.utils.path_utils import open_file
from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
from megadetector.visualization import visualization_utils as vis_utils
from megadetector.visualization.visualization_utils import \
    blur_detections, DEFAULT_BOX_THICKNESS, DEFAULT_LABEL_FONT_SIZE

default_box_sort_order = 'confidence'


#%% Constants

# This will only be used if a category mapping is not available in the results file.
DEFAULT_DETECTOR_LABEL_MAP = {
    str(k): v for k, v in detector_bbox_category_id_to_name.items()
}


#%% Support functions

def _render_image(entry,
                  detector_label_map,
                  classification_label_map,
                  confidence_threshold,
                  classification_confidence_threshold,
                  render_detections_only,
                  preserve_path_structure,
                  out_dir,
                  images_dir,
                  output_image_width,
                  box_sort_order=default_box_sort_order,
                  category_names_to_blur=None,
                  box_thickness=DEFAULT_BOX_THICKNESS,
                  box_expansion=0,
                  label_font_size=DEFAULT_LABEL_FONT_SIZE,
                  label_font='arial.ttf'):
    """
    Internal function for rendering a single image.
    """

    rendering_result = {'failed_image':False,
                        'missing_image':False,
                        'skipped_image':False,
                        'annotated_image_path':None,
                        'max_conf':None,
                        'image_filename_in_abs':None,
                        'file':entry['file']}

    image_id = entry['file']

    if 'failure' in entry and entry['failure'] is not None:
        rendering_result['failed_image'] = True
        return rendering_result

    assert 'detections' in entry and entry['detections'] is not None

    max_conf = get_max_conf(entry)
    rendering_result['max_conf'] = max_conf

    if (max_conf < confidence_threshold) and render_detections_only:
        rendering_result['skipped_image'] = True
        return rendering_result

    if images_dir is None:
        image_filename_in_abs = image_id
        assert path_is_abs(image_filename_in_abs), \
            'Absolute paths are required when no image base dir is supplied'
    else:
        assert not path_is_abs(image_id), \
            'Relative paths are required when an image base dir is supplied'
        image_filename_in_abs = os.path.join(images_dir, image_id)
    if not os.path.exists(image_filename_in_abs):
        print(f'Image {image_id} not found')
        rendering_result['missing_image'] = True
        return rendering_result

    rendering_result['image_filename_in_abs'] = image_filename_in_abs

    # Load the image
    image = vis_utils.open_image(image_filename_in_abs)

    # Find categories we're supposed to blur
    category_ids_to_blur = []
    if category_names_to_blur is not None:
        if isinstance(category_names_to_blur,str):
            category_names_to_blur = [category_names_to_blur]
        for category_id in detector_label_map:
            if detector_label_map[category_id] in category_names_to_blur:
                category_ids_to_blur.append(category_id)

    detections_to_blur = []
    for d in entry['detections']:
        if d['conf'] >= confidence_threshold and d['category'] in category_ids_to_blur:
            detections_to_blur.append(d)
    if len(detections_to_blur) > 0:
        blur_detections(image,detections_to_blur)

    # Resize if necessary
    #
    # If output_image_width is -1 or None, this will just return the original image
    image = vis_utils.resize_image(image, output_image_width)

    vis_utils.render_detection_bounding_boxes(
        entry['detections'], image,
        label_map=detector_label_map,
        classification_label_map=classification_label_map,
        confidence_threshold=confidence_threshold,
        classification_confidence_threshold=classification_confidence_threshold,
        box_sort_order=box_sort_order,
        thickness=box_thickness,
        expansion=box_expansion,
        label_font_size=label_font_size,
        label_font=label_font)

    if not preserve_path_structure:
        for char in ['/', '\\', ':']:
            image_id = image_id.replace(char, '~')
        annotated_img_path = os.path.join(out_dir, f'anno_{image_id}')
    else:
        assert not os.path.isabs(image_id), "Can't preserve paths when operating on absolute paths"
        annotated_img_path = os.path.join(out_dir, image_id)
        os.makedirs(os.path.dirname(annotated_img_path),exist_ok=True)

    image.save(annotated_img_path)
    rendering_result['annotated_image_path'] = annotated_img_path

    return rendering_result

# ...def _render_image(...)


#%% Main function

[docs] def visualize_detector_output(detector_output_path, out_dir, images_dir=None, confidence_threshold=0.15, sample=-1, output_image_width=1000, random_seed=0, render_detections_only=False, classification_confidence_threshold=0.1, html_output_file=None, html_output_options=None, preserve_path_structure=False, parallelize_rendering=True, parallelize_rendering_n_cores=10, parallelize_rendering_with_threads=True, box_sort_order=default_box_sort_order, category_names_to_blur=None, link_images_to_originals=False, detector_label_map=None, box_thickness=DEFAULT_BOX_THICKNESS, box_expansion=0, label_font_size=DEFAULT_LABEL_FONT_SIZE, label_font='arial.ttf'): """ Draws bounding boxes on images given the output of a detector. Args: detector_output_path (str): path to detector output .json file, or a loaded MD results dict out_dir (str): path to directory for saving annotated images images_dir (str, optional): folder where the images live; filenames in [detector_output_path] should be relative to [image_dir]. Can be None if paths are absolute. confidence_threshold (float, optional): threshold above which detections will be rendered sample (int, optional): maximum number of images to render, -1 for all output_image_width (int, optional): width in pixels to resize images for display, preserving aspect ration; set to -1 to use original image width random_seed (int, optional): seed to use for choosing images when sample != -1, use None to avoid forcing a seed render_detections_only (bool, optional): only render images with above-threshold detections. Empty images are discarded after sampling, so if you want to see, e.g., 1000 non-empty images, you can set [render_detections_only], but you need to sample more than 1000 images. classification_confidence_threshold (float, optional): only show classifications above this threshold; does not impact whether images are rendered, only whether classification labels (not detection categories) are displayed html_output_file (str, optional): output path for an HTML index file (not written if None) html_output_options (dict, optional): HTML formatting options; see write_html_image_list for details. The most common option you may want to supply here is 'maxFiguresPerHtmlFile'. preserve_path_structure (bool, optional): if False (default), writes images to unique names in a flat structure in the output folder; if True, preserves relative paths within the output folder parallelize_rendering (bool, optional): whether to use concurrent workers for rendering parallelize_rendering_n_cores (int, optional): number of concurrent workers to use (ignored if parallelize_rendering is False) parallelize_rendering_with_threads (bool, optional): determines whether we use threads (True) or processes (False) for parallelization (ignored if parallelize_rendering is False) box_sort_order (str, optional): sorting scheme for detection boxes, can be None, "confidence", or "reverse_confidence" category_names_to_blur (list of str, optional): category names for which we should blur detections, most commonly ['person'] link_images_to_originals (bool, optional): include a link from every rendered image back to the corresponding original image detector_label_map (dict, optional): mapping from category IDs to labels; by default (None) uses the values in the detector file. If this is the string 'no_detection_labels', hides labels. box_thickness (int or float, optional): box thickness in pixels. If this is a float less than 1.0, it's treated as a fraction of the image width. box_expansion (int or float , optional): box expansion in pixels. If this is a float less than 1.0, it's treated as a fraction of the image width. label_font_size (float, optional): label font size in pixels. If this is a float less than 1.0, it's treated as a fraction of the image width. label_font (str, optional): font filename to use for label text (default 'arial.ttf') Returns: list: list of paths to annotated images """ if isinstance(detector_output_path,str): assert os.path.exists(detector_output_path), \ 'Detector output file does not exist at {}'.format(detector_output_path) else: assert isinstance(detector_output_path,dict), \ 'detector_output_path is neither a filename nor a results dict' if images_dir is not None: assert os.path.isdir(images_dir), \ 'Image folder {} is not available'.format(images_dir) os.makedirs(out_dir, exist_ok=True) ##%% Load detector output if isinstance(detector_output_path,dict): detector_output = detector_output_path else: detector_output = load_md_or_speciesnet_file(detector_output_path) images = detector_output['images'] if confidence_threshold is None: confidence_threshold = get_typical_confidence_threshold_from_results(detector_output) assert confidence_threshold >= 0 and confidence_threshold <= 1, \ f'Confidence threshold {confidence_threshold} is invalid, must be in (0, 1).' if isinstance(detector_label_map,str): assert detector_label_map == 'no_detection_labels', \ 'Unrecognized detection label string {}'.format(detector_label_map) detector_label_map = None elif detector_label_map is not None: assert isinstance(detector_label_map,dict), \ 'Invalid detector label maps' elif 'detection_categories' in detector_output: detector_label_map = detector_output['detection_categories'] else: detector_label_map = DEFAULT_DETECTOR_LABEL_MAP num_images = len(images) print(f'Detector output file contains {num_images} entries.') if (sample is not None) and (sample > 0) and (num_images > sample): if random_seed is not None: images = sorted(images, key=lambda x: x['file']) random.seed(random_seed) random.shuffle(images) images = sorted(images[:sample], key=lambda x: x['file']) print(f'Sampled {len(images)} entries from the detector output file.') ##%% Load images, annotate them and save print('Rendering detections above a confidence threshold of {}'.format( confidence_threshold)) classification_label_map = None if 'classification_categories' in detector_output: classification_label_map = detector_output['classification_categories'] rendering_results = [] if parallelize_rendering: if parallelize_rendering_with_threads: worker_string = 'threads' else: worker_string = 'processes' pool = None try: if parallelize_rendering_n_cores is None: if parallelize_rendering_with_threads: pool = ThreadPool() else: pool = Pool() else: if parallelize_rendering_with_threads: pool = ThreadPool(parallelize_rendering_n_cores) else: pool = Pool(parallelize_rendering_n_cores) print('Rendering images with {} {}'.format(parallelize_rendering_n_cores, worker_string)) rendering_results = list(tqdm(pool.imap( partial(_render_image,detector_label_map=detector_label_map, classification_label_map=classification_label_map, confidence_threshold=confidence_threshold, classification_confidence_threshold=classification_confidence_threshold, render_detections_only=render_detections_only, preserve_path_structure=preserve_path_structure, out_dir=out_dir, images_dir=images_dir, output_image_width=output_image_width, box_sort_order=box_sort_order, category_names_to_blur=category_names_to_blur, box_thickness=box_thickness, box_expansion=box_expansion, label_font_size=label_font_size, label_font=label_font), images), total=len(images))) finally: if pool is not None: pool.close() pool.join() print('Pool closed and joined for detector output visualization') else: for entry in tqdm(images): rendering_result = _render_image(entry, detector_label_map, classification_label_map, confidence_threshold, classification_confidence_threshold, render_detections_only, preserve_path_structure, out_dir, images_dir, output_image_width, box_sort_order, category_names_to_blur=category_names_to_blur, box_thickness=box_thickness, box_expansion=box_expansion, label_font_size=label_font_size, label_font=label_font) rendering_results.append(rendering_result) # ...for each image failed_images = [r for r in rendering_results if r['failed_image']] missing_images = [r for r in rendering_results if r['missing_image']] skipped_images = [r for r in rendering_results if r['skipped_image']] print('Skipped {} failed images (of {})'.format(len(failed_images),len(images))) print('Skipped {} missing images (of {})'.format(len(missing_images),len(images))) print('Skipped {} below-threshold images (of {})'.format(len(skipped_images),len(images))) print(f'Rendered detection results to {out_dir}') annotated_image_paths = [r['annotated_image_path'] for r in rendering_results if \ r['annotated_image_path'] is not None] if html_output_file is not None: html_dir = os.path.dirname(html_output_file) html_image_info = [] for r in rendering_results: d = {} if r['annotated_image_path'] is None: assert r['failed_image'] or r['missing_image'] or r['skipped_image'] continue annotated_image_path_relative = os.path.relpath(r['annotated_image_path'],html_dir) d['filename'] = annotated_image_path_relative # For sorting d['filename_lower'] = annotated_image_path_relative.lower() d['imageStyle'] = 'max-width:95%;' d['textStyle'] = \ 'font-family:verdana,arial,calibri;font-size:80%;' + \ 'text-align:left;margin-top:20;margin-bottom:5' d['title'] = '{} (max conf: {})'.format(r['file'],r['max_conf']) if link_images_to_originals: d['linkTarget'] = r['image_filename_in_abs'] html_image_info.append(d) html_image_info = sort_list_of_dicts_by_key(html_image_info,'filename_lower') _ = write_html_image_list.write_html_image_list(html_output_file, html_image_info, options=html_output_options) # ...if we're supposed to write HTML info return annotated_image_paths
# ...def visualize_detector_output(...) #%% Command-line driver def main(): # noqa parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Annotate the bounding boxes predicted by a detector above ' 'some confidence threshold, and save the annotated images.') parser.add_argument( 'detector_output_path', type=str, help='Path to json output file of the detector') parser.add_argument( 'out_dir', type=str, help='Path to directory where the annotated images will be saved. ' 'The directory will be created if it does not exist.') parser.add_argument( '--confidence', type=float, default=0.15, help='Value between 0 and 1, indicating the confidence threshold ' 'above which to visualize bounding boxes') parser.add_argument( '--images_dir', type=str, default=None, help='Path to a local directory where images are stored. This ' 'serves as the root directory for image paths in ' 'detector_output_path. Omit if image paths are absolute.') parser.add_argument( '--sample', type=int, default=-1, help='Number of images to be annotated and rendered. Set to -1 ' '(default) to annotate all images in the detector output file. ' 'There may be fewer images if some are not found in images_dir.') parser.add_argument( '--output_image_width', type=int, default=1000, help='Integer, desired width in pixels of the output annotated images. ' 'Use -1 to not resize. Default: 1000.') parser.add_argument( '--random_seed', type=int, default=None, help='Integer, for deterministic order of image sampling') parser.add_argument( '--html_output_file', type=str, default=None, help='Filename to which we should write an HTML image index (off by default)') parser.add_argument( '--open_html_output_file', action='store_true', help='Open the .html output file when done') parser.add_argument( '--detections_only', action='store_true', help='Only render images with above-threshold detections (by default, ' 'both empty and non-empty images are rendered).') parser.add_argument( '--preserve_path_structure', action='store_true', help='Preserve relative image paths (otherwise flattens and assigns unique file names)') parser.add_argument( '--category_names_to_blur', default=None, type=str, help='Comma-separated list of category names to blur (or a single category name, typically "person")') parser.add_argument( '--classification_confidence', type=float, default=0.3, help='If classification results are present, render results above this threshold') parser.add_argument( '--box_thickness', type=float, default=DEFAULT_BOX_THICKNESS, help='Line thickness in pixels for box rendering. If this is less than 1.0, ' 'it is treated as a fraction of the image width.') parser.add_argument( '--box_expansion', type=float, default=0, help='Number of pixels to expand bounding boxes on each side. If this is less than 1.0, ' 'it is treated as a fraction of the image width.') parser.add_argument( '--label_font_size', type=float, default=DEFAULT_LABEL_FONT_SIZE, help='Font size in pixels for detection labels. If this is less than 1.0, ' 'it is treated as a fraction of the image width.') parser.add_argument( '--label_font', type=str, default='arial.ttf', help='Font filename to use for label text (default arial.ttf).') if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() category_names_to_blur = args.category_names_to_blur if category_names_to_blur is not None: category_names_to_blur = category_names_to_blur.split(',') visualize_detector_output( detector_output_path=args.detector_output_path, out_dir=args.out_dir, confidence_threshold=args.confidence, images_dir=args.images_dir, sample=args.sample, output_image_width=args.output_image_width, random_seed=args.random_seed, render_detections_only=args.detections_only, classification_confidence_threshold=args.classification_confidence, preserve_path_structure=args.preserve_path_structure, html_output_file=args.html_output_file, category_names_to_blur=category_names_to_blur, box_thickness=args.box_thickness, box_expansion=args.box_expansion, label_font_size=args.label_font_size, label_font=args.label_font) if (args.html_output_file is not None) and args.open_html_output_file: print('Opening output file {}'.format(args.html_output_file)) open_file(args.html_output_file) if __name__ == '__main__': main() #%% Interactive driver if False: pass #%% detector_output_path = os.path.expanduser('detections.json') out_dir = r'g:\temp\preview' images_dir = r'g:\camera_traps\camera_trap_images' confidence_threshold = 0.15 sample = 50 output_image_width = 1000 random_seed = 1 render_detections_only = True classification_confidence_threshold = 0.1 html_output_file = os.path.join(out_dir,'index.html') html_output_options = None preserve_path_structure = False parallelize_rendering = True parallelize_rendering_n_cores = 10 parallelize_rendering_with_threads = False _ = visualize_detector_output(detector_output_path, out_dir, images_dir, confidence_threshold, sample, output_image_width, random_seed, render_detections_only, classification_confidence_threshold, html_output_file, html_output_options, preserve_path_structure, parallelize_rendering, parallelize_rendering_n_cores, parallelize_rendering_with_threads) from megadetector.utils.path_utils import open_file open_file(html_output_file)