"""
cct_to_wi.py
Converts COCO Camera Traps .json files to the Wildlife Insights
batch upload format.
**This is very much just a demo script; all the relevant constants are hard-coded
at the top of main().**
But given that caveat, it works. You need to set up all the paths in the "paths" cell
at the top of main().
Also see:
* https://github.com/ConservationInternational/Wildlife-Insights----Data-Migration
* https://data.naturalsciences.org/wildlife-insights/taxonomy/search
"""
#%% Imports
import os
import json
import pandas as pd
from collections import defaultdict
#%% Main wrapper
[docs]
def main(): # noqa
"""
Converts COCO Camera Traps .json files to the Wildlife Insights
batch upload format; to use this, you need to modify all the paths in the "Paths"
cell.
"""
#%% Paths
# A COCO camera traps file with information about this dataset
input_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps.2020-12-26.json'
# A .json dictionary mapping common names in this dataset to dictionaries with the
# WI taxonomy fields: common_name, wi_taxon_id, class, order, family, genus, species
taxonomy_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps_to_wi.json'
# The folder where the .csv template files live
templates_dir = r'c:\temp\wi_batch_upload_templates'
# The folder to which you want to write WI-formatted .csv files
output_base = r'c:\temp\wi_output'
#%% Path validation
assert os.path.isfile(input_file)
assert os.path.isfile(taxonomy_file)
assert os.path.isdir(templates_dir)
os.makedirs(output_base,exist_ok = True)
#%% Constants
projects_file_name = 'Template Wildlife Insights Batch Upload - Projectv1.0.csv'
deployments_file_name = 'Template Wildlife Insights Batch Upload - Deploymentv1.0.csv'
images_file_name = 'Template Wildlife Insights Batch Upload - Imagev1.0.csv'
cameras_file_name = 'Template Wildlife Insights Batch Upload - Camerav1.0.csv'
assert all([os.path.isfile(os.path.join(templates_dir,fn)) for fn in \
[projects_file_name,deployments_file_name,images_file_name,cameras_file_name]])
#%% Project information
project_info = {}
project_info['project_name'] = 'Bellevue Camera Traps'
project_info['project_id'] = 'bct_001'
project_info['project_short_name'] = 'BCT'
project_info['project_objectives'] = 'none'
project_info['project_species'] = 'Multiple'
project_info['project_species_individual'] = ''
project_info['project_sensor_layout'] = 'Convenience'
project_info['project_sensor_layout_targeted_type'] = ''
project_info['project_bait_use'] = 'No'
project_info['project_bait_type'] = 'None'
project_info['project_stratification'] = 'No'
project_info['project_stratification_type'] = ''
project_info['project_sensor_method'] = 'Sensor Detection'
project_info['project_individual_animals'] = 'No'
project_info['project_admin'] = 'Dan Morris'
project_info['project_admin_email'] = 'cameratraps@lila.science'
project_info['country_code'] = 'USA'
project_info['embargo'] = str(0)
project_info['initiative_id'] = ''
project_info['metadata_license'] = 'CC0'
project_info['image_license'] = 'CC0'
project_info['project_blank_images'] = 'No'
project_info['project_sensor_cluster'] = 'No'
camera_info = {}
camera_info['project_id'] = project_info['project_id']
camera_info['camera_id'] = '0000'
camera_info['make'] = ''
camera_info['model'] = ''
camera_info['serial_number'] = ''
camera_info['year_purchased'] = ''
deployment_info = {}
deployment_info['project_id'] = project_info['project_id']
deployment_info['deployment_id'] = 'test_deployment'
deployment_info['subproject_name'] = 'test_subproject'
deployment_info['subproject_design'] = ''
deployment_info['placename'] = 'yard'
deployment_info['longitude'] = '47.6101'
deployment_info['latitude'] = '-122.2015'
deployment_info['start_date'] = '2016-01-01 00:00:00'
deployment_info['end_date'] = '2026-01-01 00:00:00'
deployment_info['event_name'] = ''
deployment_info['event_description'] = ''
deployment_info['event_type'] = ''
deployment_info['bait_type'] = ''
deployment_info['bait_description'] = ''
deployment_info['feature_type'] = 'None'
deployment_info['feature_type_methodology'] = ''
deployment_info['camera_id'] = camera_info['camera_id']
deployment_info['quiet_period'] = str(60)
deployment_info['camera_functioning'] = 'Camera Functioning'
deployment_info['sensor_height'] = 'Chest height'
deployment_info['height_other'] = ''
deployment_info['sensor_orientation'] = 'Parallel'
deployment_info['orientation_other'] = ''
deployment_info['recorded_by'] = 'Dan Morris'
image_info = {}
image_info['identified_by'] = 'Dan Morris'
#%% Read templates
def parse_fields(templates_dir,file_name):
with open(os.path.join(templates_dir,file_name),'r') as f:
lines = f.readlines()
lines = [s.strip() for s in lines if len(s.strip().replace(',','')) > 0]
assert len(lines) == 1, 'Error processing template {}'.format(file_name)
fields = lines[0].split(',')
print('Parsed {} columns from {}'.format(len(fields),file_name))
return fields
projects_fields = parse_fields(templates_dir,projects_file_name)
deployments_fields = parse_fields(templates_dir,deployments_file_name)
images_fields = parse_fields(templates_dir,images_file_name)
cameras_fields = parse_fields(templates_dir,cameras_file_name)
#%% Compare dictionary to template lists
def compare_info_to_template(info,template_fields,name):
for s in info.keys():
assert s in template_fields,'Field {} not specified in {}_fields'.format(s,name)
for s in template_fields:
assert s in info.keys(),'Field {} not specified in {}_info'.format(s,name)
def write_table(file_name,info,template_fields):
assert len(info) == len(template_fields)
project_output_file = os.path.join(output_base,file_name)
with open(project_output_file,'w') as f:
# Write the header
for i_field,s in enumerate(template_fields):
f.write(s)
if i_field != len(template_fields)-1:
f.write(',')
f.write('\n')
# Write values
for i_field,s in enumerate(template_fields):
f.write(info[s])
if i_field != len(template_fields)-1:
f.write(',')
f.write('\n')
#%% Project file
compare_info_to_template(project_info,projects_fields,'project')
write_table(projects_file_name,project_info,projects_fields)
#%% Camera file
compare_info_to_template(camera_info,cameras_fields,'camera')
write_table(cameras_file_name,camera_info,cameras_fields)
#%% Deployment file
compare_info_to_template(deployment_info,deployments_fields,'deployment')
write_table(deployments_file_name,deployment_info,deployments_fields)
#%% Images file
# Read .json file with image information
with open(input_file,'r') as f:
input_data = json.load(f)
# Read taxonomy dictionary
with open(taxonomy_file,'r') as f:
taxonomy_mapping = json.load(f)
url_base = taxonomy_mapping['url_base']
taxonomy_mapping = taxonomy_mapping['taxonomy']
# Populate output information
# df = pd.DataFrame(columns = images_fields)
category_id_to_name = {cat['id']:cat['name'] for cat in input_data['categories']}
image_id_to_annotations = defaultdict(list)
annotations = input_data['annotations']
# annotation = annotations[0]
for annotation in annotations:
image_id_to_annotations[annotation['image_id']].append(
category_id_to_name[annotation['category_id']])
rows = []
# im = input_data['images'][0]
for im in input_data['images']:
row = {}
url = url_base + im['file_name'].replace('\\','/')
row['project_id'] = project_info['project_id']
row['deployment_id'] = deployment_info['deployment_id']
row['image_id'] = im['id']
row['location'] = url
row['identified_by'] = image_info['identified_by']
category_names = image_id_to_annotations[im['id']]
assert len(category_names) == 1
category_name = category_names[0]
taxon_info = taxonomy_mapping[category_name]
assert len(taxon_info.keys()) == 7
for s in taxon_info.keys():
row[s] = taxon_info[s]
# We don't have counts, but we can differentiate between zero and 1
if category_name == 'empty':
row['number_of_objects'] = 0
else:
row['number_of_objects'] = 1
assert isinstance(im['datetime'],str)
row['uncertainty'] = None
row['timestamp'] = im['datetime']
row['highlighted'] = 0
row['age'] = None
row['sex'] = None
row['animal_recognizable'] = 'No'
row['individual_id'] = None
row['individual_animal_notes'] = None
row['markings'] = None
assert len(row) == len(images_fields)
rows.append(row)
# ...for each image
df = pd.DataFrame(rows)
df.to_csv(os.path.join(output_base,images_file_name),index=False)
# ...main()
#%% Command-line driver
if __name__ == '__main__':
main()