Source code for megadetector.data_management.cct_to_wi

"""

cct_to_wi.py

Converts COCO Camera Traps .json files to the Wildlife Insights
batch upload format.

**This is very much just a demo script; all the relevant constants are hard-coded
at the top of main().**

But given that caveat, it works.  You need to set up all the paths in the "paths" cell
at the top of main().

Also see:

* https://github.com/ConservationInternational/Wildlife-Insights----Data-Migration
* https://data.naturalsciences.org/wildlife-insights/taxonomy/search

"""

#%% Imports

import os
import json
import pandas as pd
from collections import defaultdict


#%% Main wrapper

[docs] def main(): # noqa """ Converts COCO Camera Traps .json files to the Wildlife Insights batch upload format; to use this, you need to modify all the paths in the "Paths" cell. """ #%% Paths # A COCO camera traps file with information about this dataset input_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps.2020-12-26.json' # A .json dictionary mapping common names in this dataset to dictionaries with the # WI taxonomy fields: common_name, wi_taxon_id, class, order, family, genus, species taxonomy_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps_to_wi.json' # The folder where the .csv template files live templates_dir = r'c:\temp\wi_batch_upload_templates' # The folder to which you want to write WI-formatted .csv files output_base = r'c:\temp\wi_output' #%% Path validation assert os.path.isfile(input_file) assert os.path.isfile(taxonomy_file) assert os.path.isdir(templates_dir) os.makedirs(output_base,exist_ok = True) #%% Constants projects_file_name = 'Template Wildlife Insights Batch Upload - Projectv1.0.csv' deployments_file_name = 'Template Wildlife Insights Batch Upload - Deploymentv1.0.csv' images_file_name = 'Template Wildlife Insights Batch Upload - Imagev1.0.csv' cameras_file_name = 'Template Wildlife Insights Batch Upload - Camerav1.0.csv' assert all([os.path.isfile(os.path.join(templates_dir,fn)) for fn in \ [projects_file_name,deployments_file_name,images_file_name,cameras_file_name]]) #%% Project information project_info = {} project_info['project_name'] = 'Bellevue Camera Traps' project_info['project_id'] = 'bct_001' project_info['project_short_name'] = 'BCT' project_info['project_objectives'] = 'none' project_info['project_species'] = 'Multiple' project_info['project_species_individual'] = '' project_info['project_sensor_layout'] = 'Convenience' project_info['project_sensor_layout_targeted_type'] = '' project_info['project_bait_use'] = 'No' project_info['project_bait_type'] = 'None' project_info['project_stratification'] = 'No' project_info['project_stratification_type'] = '' project_info['project_sensor_method'] = 'Sensor Detection' project_info['project_individual_animals'] = 'No' project_info['project_admin'] = 'Dan Morris' project_info['project_admin_email'] = 'cameratraps@lila.science' project_info['country_code'] = 'USA' project_info['embargo'] = str(0) project_info['initiative_id'] = '' project_info['metadata_license'] = 'CC0' project_info['image_license'] = 'CC0' project_info['project_blank_images'] = 'No' project_info['project_sensor_cluster'] = 'No' camera_info = {} camera_info['project_id'] = project_info['project_id'] camera_info['camera_id'] = '0000' camera_info['make'] = '' camera_info['model'] = '' camera_info['serial_number'] = '' camera_info['year_purchased'] = '' deployment_info = {} deployment_info['project_id'] = project_info['project_id'] deployment_info['deployment_id'] = 'test_deployment' deployment_info['subproject_name'] = 'test_subproject' deployment_info['subproject_design'] = '' deployment_info['placename'] = 'yard' deployment_info['longitude'] = '47.6101' deployment_info['latitude'] = '-122.2015' deployment_info['start_date'] = '2016-01-01 00:00:00' deployment_info['end_date'] = '2026-01-01 00:00:00' deployment_info['event_name'] = '' deployment_info['event_description'] = '' deployment_info['event_type'] = '' deployment_info['bait_type'] = '' deployment_info['bait_description'] = '' deployment_info['feature_type'] = 'None' deployment_info['feature_type_methodology'] = '' deployment_info['camera_id'] = camera_info['camera_id'] deployment_info['quiet_period'] = str(60) deployment_info['camera_functioning'] = 'Camera Functioning' deployment_info['sensor_height'] = 'Chest height' deployment_info['height_other'] = '' deployment_info['sensor_orientation'] = 'Parallel' deployment_info['orientation_other'] = '' deployment_info['recorded_by'] = 'Dan Morris' image_info = {} image_info['identified_by'] = 'Dan Morris' #%% Read templates def parse_fields(templates_dir,file_name): with open(os.path.join(templates_dir,file_name),'r') as f: lines = f.readlines() lines = [s.strip() for s in lines if len(s.strip().replace(',','')) > 0] assert len(lines) == 1, 'Error processing template {}'.format(file_name) fields = lines[0].split(',') print('Parsed {} columns from {}'.format(len(fields),file_name)) return fields projects_fields = parse_fields(templates_dir,projects_file_name) deployments_fields = parse_fields(templates_dir,deployments_file_name) images_fields = parse_fields(templates_dir,images_file_name) cameras_fields = parse_fields(templates_dir,cameras_file_name) #%% Compare dictionary to template lists def compare_info_to_template(info,template_fields,name): for s in info.keys(): assert s in template_fields,'Field {} not specified in {}_fields'.format(s,name) for s in template_fields: assert s in info.keys(),'Field {} not specified in {}_info'.format(s,name) def write_table(file_name,info,template_fields): assert len(info) == len(template_fields) project_output_file = os.path.join(output_base,file_name) with open(project_output_file,'w') as f: # Write the header for i_field,s in enumerate(template_fields): f.write(s) if i_field != len(template_fields)-1: f.write(',') f.write('\n') # Write values for i_field,s in enumerate(template_fields): f.write(info[s]) if i_field != len(template_fields)-1: f.write(',') f.write('\n') #%% Project file compare_info_to_template(project_info,projects_fields,'project') write_table(projects_file_name,project_info,projects_fields) #%% Camera file compare_info_to_template(camera_info,cameras_fields,'camera') write_table(cameras_file_name,camera_info,cameras_fields) #%% Deployment file compare_info_to_template(deployment_info,deployments_fields,'deployment') write_table(deployments_file_name,deployment_info,deployments_fields) #%% Images file # Read .json file with image information with open(input_file,'r') as f: input_data = json.load(f) # Read taxonomy dictionary with open(taxonomy_file,'r') as f: taxonomy_mapping = json.load(f) url_base = taxonomy_mapping['url_base'] taxonomy_mapping = taxonomy_mapping['taxonomy'] # Populate output information # df = pd.DataFrame(columns = images_fields) category_id_to_name = {cat['id']:cat['name'] for cat in input_data['categories']} image_id_to_annotations = defaultdict(list) annotations = input_data['annotations'] # annotation = annotations[0] for annotation in annotations: image_id_to_annotations[annotation['image_id']].append( category_id_to_name[annotation['category_id']]) rows = [] # im = input_data['images'][0] for im in input_data['images']: row = {} url = url_base + im['file_name'].replace('\\','/') row['project_id'] = project_info['project_id'] row['deployment_id'] = deployment_info['deployment_id'] row['image_id'] = im['id'] row['location'] = url row['identified_by'] = image_info['identified_by'] category_names = image_id_to_annotations[im['id']] assert len(category_names) == 1 category_name = category_names[0] taxon_info = taxonomy_mapping[category_name] assert len(taxon_info.keys()) == 7 for s in taxon_info.keys(): row[s] = taxon_info[s] # We don't have counts, but we can differentiate between zero and 1 if category_name == 'empty': row['number_of_objects'] = 0 else: row['number_of_objects'] = 1 assert isinstance(im['datetime'],str) row['uncertainty'] = None row['timestamp'] = im['datetime'] row['highlighted'] = 0 row['age'] = None row['sex'] = None row['animal_recognizable'] = 'No' row['individual_id'] = None row['individual_animal_notes'] = None row['markings'] = None assert len(row) == len(images_fields) rows.append(row) # ...for each image df = pd.DataFrame(rows) df.to_csv(os.path.join(output_base,images_file_name),index=False)
# ...main() #%% Command-line driver if __name__ == '__main__': main()