Source code for megadetector.data_management.animl_to_md

"""

animl_to_md.py

Convert a .csv file produced by the Animl package:

https://github.com/conservationtechlab/animl-py

...to a MD results file suitable for import into Timelapse.

Columns are expected to be:

file
category (MD category identifies: 1==animal, 2==person, 3==vehicle)
detection_conf
bbox1,bbox2,bbox3,bbox4
class
classification_conf

"""

#%% Imports and constants

import sys
import argparse

import pandas as pd

from megadetector.utils.ct_utils import write_json
from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
detection_category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP


#%% Main function

[docs] def animl_results_to_md_results(input_file,output_file=None): """ Converts the .csv file [input_file] to the MD-formatted .json file [output_file]. If [output_file] is None, '.json' will be appended to the input file. """ if output_file is None: output_file = input_file + '.json' df = pd.read_csv(input_file) expected_columns = ('file','category','detection_conf', 'bbox1','bbox2','bbox3','bbox4','class','classification_conf') for s in expected_columns: assert s in df.columns,\ 'Expected column {} not found'.format(s) classification_category_name_to_id = {} filename_to_results = {} # i_row = 0; row = df.iloc[i_row] for i_row,row in df.iterrows(): # Is this the first detection we've seen for this file? if row['file'] in filename_to_results: im = filename_to_results[row['file']] else: im = {} im['detections'] = [] im['file'] = row['file'] filename_to_results[im['file']] = im # Pandas often reads integer columns as float64, so check integer-ness # rather than just isinstance(..., int) assert pd.notna(row['category']) and float(row['category']).is_integer(), \ 'Invalid category identifier in row {} (file: {})'.format(i_row, im['file']) detection_category_id = str(int(row['category'])) assert detection_category_id in detection_category_id_to_name,\ 'Unrecognized detection category ID {}'.format(detection_category_id) detection = {} detection['category'] = detection_category_id detection['conf'] = row['detection_conf'] bbox = [row['bbox1'],row['bbox2'],row['bbox3'],row['bbox4']] detection['bbox'] = bbox classification_category_name = row['class'] # Have we seen this classification category before? if classification_category_name in classification_category_name_to_id: classification_category_id = \ classification_category_name_to_id[classification_category_name] else: classification_category_id = str(len(classification_category_name_to_id)) classification_category_name_to_id[classification_category_name] = \ classification_category_id classifications = [[classification_category_id,row['classification_conf']]] detection['classifications'] = classifications im['detections'].append(detection) # ...for each row info = {} info['format_version'] = '1.3' info['detector'] = 'Animl' info['classifier'] = 'Animl' results = {} results['info'] = info results['detection_categories'] = detection_category_id_to_name results['classification_categories'] = \ {v: k for k, v in classification_category_name_to_id.items()} results['images'] = list(filename_to_results.values()) write_json(output_file,results)
# ...animl_results_to_md_results(...) #%% Interactive driver if False: pass #%% input_file = r"G:\temp\animl-runs\animl-runs\Coati_v2\manifest.csv" output_file = None animl_results_to_md_results(input_file,output_file) #%% Command-line driver def main(): """ Command-line driver for animl_to_md """ parser = argparse.ArgumentParser( description='Convert an Animl-formatted .csv results file to MD-formatted .json results file') parser.add_argument( 'input_file', type=str, help='input .csv file') parser.add_argument( '--output_file', type=str, default=None, help='output .json file (defaults to input file appended with ".json")') if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() animl_results_to_md_results(args.input_file,args.output_file) if __name__ == '__main__': main()