Source code for megadetector.visualization.visualize_detector_output

"""

visualize_detector_output.py

Render images with bounding boxes annotated on them to a folder, based on a
detector output result file (.json), optionally writing an HTML index file.

"""

#%% Imports

import argparse
import os
import random
import sys

from multiprocessing.pool import ThreadPool
from multiprocessing.pool import Pool
from functools import partial
from tqdm import tqdm

from megadetector.data_management.annotations.annotation_constants import detector_bbox_category_id_to_name
from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
from megadetector.utils.ct_utils import get_max_conf
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
from megadetector.utils import write_html_image_list
from megadetector.utils.path_utils import path_is_abs
from megadetector.utils.path_utils import open_file
from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
from megadetector.visualization import visualization_utils as vis_utils
from megadetector.visualization.visualization_utils import \
    blur_detections, DEFAULT_BOX_THICKNESS, DEFAULT_LABEL_FONT_SIZE

default_box_sort_order = 'confidence'


#%% Constants

# This will only be used if a category mapping is not available in the results file.
DEFAULT_DETECTOR_LABEL_MAP = {
    str(k): v for k, v in detector_bbox_category_id_to_name.items()
}


#%% Support functions

def _render_image(entry,
                  detector_label_map,
                  classification_label_map,
                  confidence_threshold,
                  classification_confidence_threshold,
                  render_detections_only,
                  preserve_path_structure,
                  out_dir,
                  images_dir,
                  output_image_width,
                  box_sort_order=default_box_sort_order,
                  category_names_to_blur=None,
                  box_thickness=DEFAULT_BOX_THICKNESS,
                  box_expansion=0,
                  label_font_size=DEFAULT_LABEL_FONT_SIZE,
                  label_font='arial.ttf'):
    """
    Internal function for rendering a single image.
    """

    rendering_result = {'failed_image':False,
                        'missing_image':False,
                        'skipped_image':False,
                        'annotated_image_path':None,
                        'max_conf':None,
                        'image_filename_in_abs':None,
                        'open_failure':None,
                        'rendering_failure':None,
                        'file':entry['file']}

    image_id = entry['file']

    if 'failure' in entry and entry['failure'] is not None:
        rendering_result['failed_image'] = True
        return rendering_result

    assert 'detections' in entry and entry['detections'] is not None

    max_conf = get_max_conf(entry)
    rendering_result['max_conf'] = max_conf

    if (max_conf < confidence_threshold) and render_detections_only:
        rendering_result['skipped_image'] = True
        return rendering_result

    if images_dir is None:
        image_filename_in_abs = image_id
        assert path_is_abs(image_filename_in_abs), \
            'Absolute paths are required when no image base dir is supplied'
    else:
        assert not path_is_abs(image_id), \
            'Relative paths are required when an image base dir is supplied'
        image_filename_in_abs = os.path.join(images_dir, image_id)
    if not os.path.exists(image_filename_in_abs):
        print('Image {} not found'.format(image_id))
        rendering_result['missing_image'] = True
        return rendering_result

    rendering_result['image_filename_in_abs'] = image_filename_in_abs

    # Load the image
    try:
        image = vis_utils.open_image(image_filename_in_abs)
    except Exception as e:
        print('Warning: error opening {}: {}'.format(image_filename_in_abs,str(e)))
        rendering_result['open_failure'] = True
        return rendering_result

    try:

        # Find categories we're supposed to blur
        category_ids_to_blur = []
        if category_names_to_blur is not None:
            if isinstance(category_names_to_blur,str):
                category_names_to_blur = [category_names_to_blur]
            for category_id in detector_label_map:
                if detector_label_map[category_id] in category_names_to_blur:
                    category_ids_to_blur.append(category_id)

        detections_to_blur = []
        for d in entry['detections']:
            if d['conf'] >= confidence_threshold and d['category'] in category_ids_to_blur:
                detections_to_blur.append(d)
        if len(detections_to_blur) > 0:
            blur_detections(image,detections_to_blur)

        # Resize if necessary
        #
        # If output_image_width is -1 or None, this will just return the original image
        image = vis_utils.resize_image(image, output_image_width)

        vis_utils.render_detection_bounding_boxes(
            entry['detections'], image,
            label_map=detector_label_map,
            classification_label_map=classification_label_map,
            confidence_threshold=confidence_threshold,
            classification_confidence_threshold=classification_confidence_threshold,
            box_sort_order=box_sort_order,
            thickness=box_thickness,
            expansion=box_expansion,
            label_font_size=label_font_size,
            label_font=label_font)

        if not preserve_path_structure:
            for char in ['/', '\\', ':']:
                image_id = image_id.replace(char, '~')
            annotated_img_path = os.path.join(out_dir, f'anno_{image_id}')
        else:
            assert not os.path.isabs(image_id), "Can't preserve paths when operating on absolute paths"
            annotated_img_path = os.path.join(out_dir, image_id)
            os.makedirs(os.path.dirname(annotated_img_path),exist_ok=True)

        image.save(annotated_img_path)
        rendering_result['annotated_image_path'] = annotated_img_path

    except Exception as e:

        print('Warning: error rendering {}: {}'.format(image_filename_in_abs,str(e)))
        rendering_result['rendering_failure'] = True

    return rendering_result

# ...def _render_image(...)


#%% Main function


[docs]
def visualize_detector_output(detector_output_path,
                              out_dir,
                              images_dir=None,
                              confidence_threshold=0.15,
                              sample=-1,
                              output_image_width=1000,
                              random_seed=0,
                              render_detections_only=False,
                              classification_confidence_threshold=0.1,
                              html_output_file=None,
                              html_output_options=None,
                              preserve_path_structure=False,
                              parallelize_rendering=True,
                              parallelize_rendering_n_cores=10,
                              parallelize_rendering_with_threads=True,
                              box_sort_order=default_box_sort_order,
                              category_names_to_blur=None,
                              link_images_to_originals=False,
                              detector_label_map=None,
                              box_thickness=DEFAULT_BOX_THICKNESS,
                              box_expansion=0,
                              label_font_size=DEFAULT_LABEL_FONT_SIZE,
                              label_font='arial.ttf'):
    """
    Draws bounding boxes on images given the output of a detector.

    Args:
        detector_output_path (str): path to detector output .json file, or a loaded MD results
            dict
        out_dir (str): path to directory for saving annotated images
        images_dir (str, optional): folder where the images live; filenames in
            [detector_output_path] should be relative to [image_dir].  Can be None if paths are
            absolute.
        confidence_threshold (float, optional): threshold above which detections will be rendered
        sample (int, optional): maximum number of images to render, -1 for all
        output_image_width (int, optional): width in pixels to resize images for display,
            preserving aspect ration; set to -1 to use original image width
        random_seed (int, optional): seed to use for choosing images when sample != -1, use None
            to avoid forcing a seed
        render_detections_only (bool, optional): only render images with above-threshold detections.
            Empty images are discarded after sampling, so if you want to see, e.g., 1000 non-empty
            images, you can set [render_detections_only], but you need to sample more than 1000 images.
        classification_confidence_threshold (float, optional): only show classifications
            above this threshold; does not impact whether images are rendered, only whether
            classification labels (not detection categories) are displayed
        html_output_file (str, optional): output path for an HTML index file (not written
            if None)
        html_output_options (dict, optional): HTML formatting options; see write_html_image_list
            for details.  The most common option you may want to supply here is
            'maxFiguresPerHtmlFile'.
        preserve_path_structure (bool, optional): if False (default), writes images to unique
            names in a flat structure in the output folder; if True, preserves relative paths
            within the output folder
        parallelize_rendering (bool, optional): whether to use concurrent workers for rendering
        parallelize_rendering_n_cores (int, optional): number of concurrent workers to use
            (ignored if parallelize_rendering is False)
        parallelize_rendering_with_threads (bool, optional): determines whether we use
            threads (True) or processes (False) for parallelization (ignored if parallelize_rendering
            is False)
        box_sort_order (str, optional): sorting scheme for detection boxes, can be None, "confidence", or
            "reverse_confidence"
        category_names_to_blur (list of str, optional): category names for which we should blur detections,
            most commonly ['person']
        link_images_to_originals (bool, optional): include a link from every rendered image back to
            the corresponding original image
        detector_label_map (dict, optional): mapping from category IDs to labels; by default (None) uses
            the values in the detector file.  If this is the string 'no_detection_labels', hides labels.
        box_thickness (int or float, optional): box thickness in pixels.  If this is a float less than
            1.0, it's treated as a fraction of the image width.
        box_expansion (int or float , optional): box expansion in pixels.  If this is a float less
            than 1.0, it's treated as a fraction of the image width.
        label_font_size (float, optional): label font size in pixels.  If this is a float less
            than 1.0, it's treated as a fraction of the image width.
        label_font (str, optional): font filename to use for label text (default 'arial.ttf')

    Returns:
        list: list of paths to annotated images
    """

    if isinstance(detector_output_path,str):
        assert os.path.exists(detector_output_path), \
            'Detector output file does not exist at {}'.format(detector_output_path)
    else:
        assert isinstance(detector_output_path,dict), \
            'detector_output_path is neither a filename nor a results dict'

    if images_dir is not None:
        assert os.path.isdir(images_dir), \
            'Image folder {} is not available'.format(images_dir)

    os.makedirs(out_dir, exist_ok=True)


    ##%% Load detector output

    if isinstance(detector_output_path,dict):
        detector_output = detector_output_path
    else:
        detector_output = load_md_or_speciesnet_file(detector_output_path)

    images = detector_output['images']

    if confidence_threshold is None:
        confidence_threshold = get_typical_confidence_threshold_from_results(detector_output)

    assert confidence_threshold >= 0 and confidence_threshold <= 1, \
        f'Confidence threshold {confidence_threshold} is invalid, must be in (0, 1).'

    if isinstance(detector_label_map,str):
        assert detector_label_map == 'no_detection_labels', \
            'Unrecognized detection label string {}'.format(detector_label_map)
        detector_label_map = None
    elif detector_label_map is not None:
        assert isinstance(detector_label_map,dict), \
            'Invalid detector label maps'
    elif 'detection_categories' in detector_output:
        detector_label_map = detector_output['detection_categories']
    else:
        detector_label_map = DEFAULT_DETECTOR_LABEL_MAP

    num_images = len(images)
    print(f'Detector output file contains {num_images} entries.')

    if (sample is not None) and (sample > 0) and (num_images > sample):

        if random_seed is not None:
            images = sorted(images, key=lambda x: x['file'])
            random.seed(random_seed)

        random.shuffle(images)
        images = sorted(images[:sample], key=lambda x: x['file'])
        print(f'Sampled {len(images)} entries from the detector output file.')


    ##%% Load images, annotate them and save

    print('Rendering detections above a confidence threshold of {}'.format(
        confidence_threshold))

    classification_label_map = None

    if 'classification_categories' in detector_output:
        classification_label_map = detector_output['classification_categories']

    rendering_results = []

    if parallelize_rendering:

        if parallelize_rendering_with_threads:
            worker_string = 'threads'
        else:
            worker_string = 'processes'

        pool = None
        try:
            if parallelize_rendering_n_cores is None:
                if parallelize_rendering_with_threads:
                    pool = ThreadPool()
                else:
                    pool = Pool()
            else:
                if parallelize_rendering_with_threads:
                    pool = ThreadPool(parallelize_rendering_n_cores)
                else:
                    pool = Pool(parallelize_rendering_n_cores)
                print('Rendering images with {} {}'.format(parallelize_rendering_n_cores,
                                                           worker_string))
            rendering_results = list(tqdm(pool.imap(
                                     partial(_render_image,detector_label_map=detector_label_map,
                                             classification_label_map=classification_label_map,
                                             confidence_threshold=confidence_threshold,
                                             classification_confidence_threshold=classification_confidence_threshold,
                                             render_detections_only=render_detections_only,
                                             preserve_path_structure=preserve_path_structure,
                                             out_dir=out_dir,
                                             images_dir=images_dir,
                                             output_image_width=output_image_width,
                                             box_sort_order=box_sort_order,
                                             category_names_to_blur=category_names_to_blur,
                                             box_thickness=box_thickness,
                                             box_expansion=box_expansion,
                                             label_font_size=label_font_size,
                                             label_font=label_font),
                                     images), total=len(images)))
        finally:
            if pool is not None:
                pool.close()
                pool.join()
                print('Pool closed and joined for detector output visualization')

    else:

        for entry in tqdm(images):

            rendering_result = _render_image(entry,
                                             detector_label_map,
                                             classification_label_map,
                                             confidence_threshold,
                                             classification_confidence_threshold,
                                             render_detections_only,
                                             preserve_path_structure,
                                             out_dir,
                                             images_dir,
                                             output_image_width,
                                             box_sort_order,
                                             category_names_to_blur=category_names_to_blur,
                                             box_thickness=box_thickness,
                                             box_expansion=box_expansion,
                                             label_font_size=label_font_size,
                                             label_font=label_font)
            rendering_results.append(rendering_result)

    # ...for each image

    failed_images = [r for r in rendering_results if r['failed_image']]
    missing_images = [r for r in rendering_results if r['missing_image']]
    skipped_images = [r for r in rendering_results if r['skipped_image']]

    print('Skipped {} failed images (of {})'.format(len(failed_images),len(images)))
    print('Skipped {} missing images (of {})'.format(len(missing_images),len(images)))
    print('Skipped {} below-threshold images (of {})'.format(len(skipped_images),len(images)))

    print(f'Rendered detection results to {out_dir}')

    annotated_image_paths = [r['annotated_image_path'] for r in rendering_results if \
                             r['annotated_image_path'] is not None]

    if html_output_file is not None:

        html_dir = os.path.dirname(html_output_file)

        html_image_info = []

        for r in rendering_results:
            d = {}
            if r['annotated_image_path'] is None:
                assert r['failed_image'] or r['missing_image'] or r['skipped_image'] or \
                    r['open_failure'] or r['rendering_failure']
                continue
            annotated_image_path_relative = os.path.relpath(r['annotated_image_path'],html_dir)
            d['filename'] = annotated_image_path_relative
            # For sorting
            d['filename_lower'] = annotated_image_path_relative.lower()
            d['imageStyle'] = 'max-width:95%;'
            d['textStyle'] = \
             'font-family:verdana,arial,calibri;font-size:80%;' + \
                 'text-align:left;margin-top:20;margin-bottom:5'
            d['title'] = '{} (max conf: {})'.format(r['file'],r['max_conf'])
            if link_images_to_originals:
                d['linkTarget'] = r['image_filename_in_abs']
            html_image_info.append(d)

        html_image_info = sort_list_of_dicts_by_key(html_image_info,'filename_lower')
        _ = write_html_image_list.write_html_image_list(html_output_file,
                                                        html_image_info,
                                                        options=html_output_options)

    # ...if we're supposed to write HTML info

    return annotated_image_paths


# ...def visualize_detector_output(...)


#%% Command-line driver

def main(): # noqa

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Annotate the bounding boxes predicted by a detector above '
                    'some confidence threshold, and save the annotated images.')
    parser.add_argument(
        'detector_output_path', type=str,
        help='Path to json output file of the detector')
    parser.add_argument(
        'out_dir', type=str,
        help='Path to directory where the annotated images will be saved. '
             'The directory will be created if it does not exist.')
    parser.add_argument(
        '--confidence', type=float, default=0.15,
        help='Value between 0 and 1, indicating the confidence threshold '
             'above which to visualize bounding boxes')
    parser.add_argument(
        '--images_dir', type=str, default=None,
        help='Path to a local directory where images are stored. This '
             'serves as the root directory for image paths in '
             'detector_output_path.  Omit if image paths are absolute.')
    parser.add_argument(
        '--sample', type=int, default=-1,
        help='Number of images to be annotated and rendered. Set to -1 '
             '(default) to annotate all images in the detector output file. '
             'There may be fewer images if some are not found in images_dir.')
    parser.add_argument(
        '--output_image_width', type=int, default=1000,
        help='Integer, desired width in pixels of the output annotated images. '
             'Use -1 to not resize. Default: 1000.')
    parser.add_argument(
        '--random_seed', type=int, default=None,
        help='Integer, for deterministic order of image sampling')
    parser.add_argument(
        '--html_output_file', type=str, default=None,
        help='Filename to which we should write an HTML image index (off by default)')
    parser.add_argument(
        '--open_html_output_file', action='store_true',
        help='Open the .html output file when done')
    parser.add_argument(
        '--detections_only', action='store_true',
        help='Only render images with above-threshold detections (by default, '
             'both empty and non-empty images are rendered).')
    parser.add_argument(
        '--preserve_path_structure', action='store_true',
        help='Preserve relative image paths (otherwise flattens and assigns unique file names)')
    parser.add_argument(
        '--category_names_to_blur', default=None, type=str,
        help='Comma-separated list of category names to blur (or a single category name, typically "person")')
    parser.add_argument(
        '--classification_confidence', type=float, default=0.3,
        help='If classification results are present, render results above this threshold')
    parser.add_argument(
        '--box_thickness', type=float, default=DEFAULT_BOX_THICKNESS,
        help='Line thickness in pixels for box rendering.  If this is less than 1.0, '
             'it is treated as a fraction of the image width.')
    parser.add_argument(
        '--box_expansion', type=float, default=0,
        help='Number of pixels to expand bounding boxes on each side.  If this is less than 1.0, '
             'it is treated as a fraction of the image width.')
    parser.add_argument(
        '--label_font_size', type=float, default=DEFAULT_LABEL_FONT_SIZE,
        help='Font size in pixels for detection labels.  If this is less than 1.0, '
             'it is treated as a fraction of the image width.')
    parser.add_argument(
        '--label_font', type=str, default='arial.ttf',
        help='Font filename to use for label text (default arial.ttf).')

    if len(sys.argv[1:]) == 0:
        parser.print_help()
        parser.exit()

    args = parser.parse_args()

    category_names_to_blur = args.category_names_to_blur
    if category_names_to_blur is not None:
        category_names_to_blur = category_names_to_blur.split(',')

    visualize_detector_output(
        detector_output_path=args.detector_output_path,
        out_dir=args.out_dir,
        confidence_threshold=args.confidence,
        images_dir=args.images_dir,
        sample=args.sample,
        output_image_width=args.output_image_width,
        random_seed=args.random_seed,
        render_detections_only=args.detections_only,
        classification_confidence_threshold=args.classification_confidence,
        preserve_path_structure=args.preserve_path_structure,
        html_output_file=args.html_output_file,
        category_names_to_blur=category_names_to_blur,
        box_thickness=args.box_thickness,
        box_expansion=args.box_expansion,
        label_font_size=args.label_font_size,
        label_font=args.label_font)

    if (args.html_output_file is not None) and args.open_html_output_file:
        print('Opening output file {}'.format(args.html_output_file))
        open_file(args.html_output_file)

if __name__ == '__main__':
    main()


#%% Interactive driver

if False:

    pass

    #%%

    detector_output_path = os.path.expanduser('detections.json')
    out_dir = r'g:\temp\preview'
    images_dir = r'g:\camera_traps\camera_trap_images'
    confidence_threshold = 0.15
    sample = 50
    output_image_width = 1000
    random_seed = 1
    render_detections_only = True
    classification_confidence_threshold = 0.1
    html_output_file = os.path.join(out_dir,'index.html')
    html_output_options = None
    preserve_path_structure = False
    parallelize_rendering = True
    parallelize_rendering_n_cores = 10
    parallelize_rendering_with_threads = False

    _ = visualize_detector_output(detector_output_path,
                                  out_dir,
                                  images_dir,
                                  confidence_threshold,
                                  sample,
                                  output_image_width,
                                  random_seed,
                                  render_detections_only,
                                  classification_confidence_threshold,
                                  html_output_file,
                                  html_output_options,
                                  preserve_path_structure,
                                  parallelize_rendering,
                                  parallelize_rendering_n_cores,
                                  parallelize_rendering_with_threads)

    from megadetector.utils.path_utils import open_file
    open_file(html_output_file)