"""
combine_coco_camera_traps_files.py
Merges two or more .json files in COCO Camera Traps format, optionally
writing the results to another .json file.
- Concatenates image lists, erroring if images are not unique.
- Errors on unrecognized fields.
- Checks compatibility in info structs, within reason.
*Example command-line invocation*
combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
"""
#%% Constants and imports
import os
import sys
import json
import argparse
from megadetector.utils import ct_utils
#%% Merge functions
[docs]
def combine_cct_files(input_files,
output_file=None,
require_uniqueness=True,
filename_prefixes=None):
"""
Merges the list of COCO Camera Traps files [input_files] into a single
dictionary, optionally writing the result to [output_file].
Args:
input_files (list): paths to CCT .json files
output_file (str, optional): path to write merged .json file
require_uniqueness (bool, optional): whether to require that the images in
each input_dict be unique
filename_prefixes (dict, optional): dict mapping input filenames to strings
that should be prepended to image filenames from that source
Returns:
dict: the merged COCO-formatted .json dict
"""
# Argument validation
for fn in input_files:
assert os.path.isfile(fn), 'Could not find file {}'.format(fn)
if filename_prefixes is not None:
assert isinstance(filename_prefixes,dict), 'filename_prefixes must be a dict'
assert len(filename_prefixes) == len(input_files), 'Prefix dict mismatch'
input_dicts = []
print('Loading input files')
for fn in input_files:
with open(fn, 'r', encoding='utf-8') as f:
d = json.load(f)
if filename_prefixes is not None:
assert fn in filename_prefixes, \
'No prefix mapping for {}'.format(fn)
d['filename_prefix'] = filename_prefixes[fn]
input_dicts.append(d)
print('Merging results')
merged_dict = combine_cct_dictionaries(
input_dicts, require_uniqueness=require_uniqueness)
print('Writing output')
if output_file is not None:
ct_utils.write_json(output_file, merged_dict)
return merged_dict
[docs]
def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
"""
Merges the list of COCO Camera Traps dictionaries [input_dicts]. See module header
comment for details on merge rules.
Args:
input_dicts (list of dict): list of CCT dicts
require_uniqueness (bool, optional): whether to require that the images in
each input_dict be unique
Returns:
dict: the merged COCO-formatted .json dict
"""
filename_to_image = {}
all_annotations = []
info = None
category_name_to_id = {}
category_name_to_id['empty'] = 0
next_category_id = 1
known_fields = ['info', 'categories', 'annotations','images','filename_prefix']
# i_input_dict = 0; input_dict = input_dicts[i_input_dict]
for i_input_dict,input_dict in enumerate(input_dicts):
filename_prefix = ''
if ('filename_prefix' in input_dict.keys()):
filename_prefix = input_dict['filename_prefix']
for k in input_dict.keys():
if k not in known_fields:
raise ValueError(f'Unrecognized CCT field: {k}')
# We will prepend an index to every ID to guarantee uniqueness
index_string = 'ds' + str(i_input_dict).zfill(3) + '_'
old_cat_id_to_new_cat_id = {}
# Map detection categories from the original data set into the merged data set
for original_category in input_dict['categories']:
original_cat_id = original_category['id']
cat_name = original_category['name']
if cat_name in category_name_to_id:
new_cat_id = category_name_to_id[cat_name]
else:
new_cat_id = next_category_id
next_category_id += 1
category_name_to_id[cat_name] = new_cat_id
if original_cat_id in old_cat_id_to_new_cat_id:
assert old_cat_id_to_new_cat_id[original_cat_id] == new_cat_id
else:
old_cat_id_to_new_cat_id[original_cat_id] = new_cat_id
# ...for each category
# Merge original image list into the merged data set
for im in input_dict['images']:
if 'seq_id' in im:
im['seq_id'] = index_string + str(im['seq_id'])
if 'location' in im:
im['location'] = index_string + im['location']
im_file = filename_prefix + im['file_name']
im['file_name'] = im_file
if require_uniqueness:
assert im_file not in filename_to_image, f'Duplicate image: {im_file}'
else:
if im_file in filename_to_image:
print('Redundant image {}'.format(im_file))
# Create a unique ID
im['id'] = index_string + str(im['id'])
filename_to_image[im_file] = im
# ...for each image
# Same for annotations
for ann in input_dict['annotations']:
ann['image_id'] = index_string + str(ann['image_id'])
ann['id'] = index_string + str(ann['id'])
assert ann['category_id'] in old_cat_id_to_new_cat_id
ann['category_id'] = old_cat_id_to_new_cat_id[ann['category_id']]
# ...for each annotation
all_annotations.extend(input_dict['annotations'])
# Merge info dicts, don't check completion time fields
if info is None:
import copy
info = copy.deepcopy(input_dict['info'])
info['original_info'] = [input_dict['info']]
else:
info['original_info'].append(input_dict['info'])
# ...for each dictionary
# Convert merged image dictionaries to a sorted list
sorted_images = sorted(filename_to_image.values(), key=lambda im: im['file_name'])
all_categories = [{'id':category_name_to_id[cat_name],'name':cat_name} for\
cat_name in category_name_to_id.keys()]
merged_dict = {'info': info,
'categories': all_categories,
'images': sorted_images,
'annotations': all_annotations}
return merged_dict
# ...combine_cct_dictionaries(...)
#%% Command-line driver
def main(): # noqa
parser = argparse.ArgumentParser()
parser.add_argument(
'input_paths', nargs='+',
help='List of input .json files')
parser.add_argument(
'output_path',
help='Output .json file')
if len(sys.argv[1:]) == 0:
parser.print_help()
parser.exit()
args = parser.parse_args()
combine_cct_files(args.input_paths, args.output_path)
if __name__ == '__main__':
main()