"""
top_folders_to_bottom.py
Given a base folder with files like:
* A/1/2/a.jpg
* B/3/4/b.jpg
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
* 1/2/A/a.jpg
* 3/4/B/b.jpg
In practice, this is used to make this:
animal/camera01/image01.jpg
...look like:
camera01/animal/image01.jpg
"""
#%% Constants and imports
import os
import sys
import shutil
import argparse
from pathlib import Path
from tqdm import tqdm
from functools import partial
from multiprocessing.pool import ThreadPool
from megadetector.utils.path_utils import path_is_abs
#%% Classes
[docs]
class TopFoldersToBottomOptions:
"""
Options used to parameterize top_folders_to_bottom()
"""
def __init__(self,
input_folder,
output_folder,
copy=True,
n_threads=1,
overwrite=False):
#: Whether to copy (True) vs. move (False) false when re-organizing
self.copy = copy
#: Number of worker threads to use, or <1 to disable parallelization
self.n_threads = n_threads
#: Input folder
self.input_folder = input_folder
#: Output folder
self.output_folder = output_folder
#: If this is False and an output file exists, throw an error
self.overwrite = overwrite
#%% Main functions
def _process_file(relative_filename,options,execute=True):
assert ('/' in relative_filename) and \
('\\' not in relative_filename) and \
(not path_is_abs(relative_filename))
# Find top-level folder
tokens = relative_filename.split('/')
topmost_folder = tokens.pop(0)
tokens.insert(len(tokens)-1,topmost_folder)
# Find file/folder names
output_relative_path = '/'.join(tokens)
output_relative_folder = '/'.join(tokens[0:-1])
output_absolute_folder = os.path.join(options.output_folder,output_relative_folder)
output_absolute_path = os.path.join(options.output_folder,output_relative_path)
if execute:
os.makedirs(output_absolute_folder,exist_ok=True)
input_absolute_path = os.path.join(options.input_folder,relative_filename)
if not options.overwrite:
assert not os.path.isfile(output_absolute_path), \
'Error: output file {} exists'.format(output_absolute_path)
# Move or copy
if options.copy:
shutil.copy(input_absolute_path, output_absolute_path)
else:
shutil.move(input_absolute_path, output_absolute_path)
return output_absolute_path
# ...def _process_file()
[docs]
def top_folders_to_bottom(options):
"""
top_folders_to_bottom.py
Given a base folder with files like:
* A/1/2/a.jpg
* B/3/4/b.jpg
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
* 1/2/A/a.jpg
* 3/4/B/b.jpg
In practice, this is used to make this:
animal/camera01/image01.jpg
...look like:
camera01/animal/image01.jpg
Args:
options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
"""
os.makedirs(options.output_folder,exist_ok=True)
# Enumerate input folder
print('Enumerating files...')
files = list(Path(options.input_folder).rglob('*'))
files = [p for p in files if not p.is_dir()]
files = [str(s) for s in files]
print('Enumerated {} files'.format(len(files)))
# Convert absolute paths to relative paths
relative_files = [os.path.relpath(s,options.input_folder) for s in files]
# Standardize delimiters
relative_files = [s.replace('\\','/') for s in relative_files]
base_files = [s for s in relative_files if '/' not in s]
if len(base_files) > 0:
print('Warning: ignoring {} files in the base folder'.format(len(base_files)))
relative_files = [s for s in relative_files if '/' in s]
# Make sure each input file maps to a unique output file
absolute_output_files = [_process_file(s, options, execute=False) for s in relative_files]
assert len(absolute_output_files) == len(set(absolute_output_files)),\
"Error: input filenames don't map to unique output filenames"
# relative_filename = relative_files[0]
# Loop
if options.n_threads <= 1:
for relative_filename in tqdm(relative_files):
_process_file(relative_filename,options)
else:
print('Starting a pool with {} threads'.format(options.n_threads))
pool = ThreadPool(options.n_threads)
try:
process_file_with_options = partial(_process_file, options=options)
_ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
finally:
pool.close()
pool.join()
print('Pool closed and join for folder inversion')
# ...def top_folders_to_bottom(...)
#%% Interactive driver
if False:
pass
#%%
input_folder = r"G:\temp\output"
output_folder = r"G:\temp\output-inverted"
options = TopFoldersToBottomOptions(input_folder,output_folder,copy=True,n_threads=10)
#%%
top_folders_to_bottom(options)
#%% Command-line driver
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
def main(): # noqa
parser = argparse.ArgumentParser()
parser.add_argument('input_folder', type=str, help='Input image folder')
parser.add_argument('output_folder', type=str, help='Output image folder')
parser.add_argument('--copy', action='store_true',
help='Copy images, instead of moving (moving is the default)')
parser.add_argument('--overwrite', action='store_true',
help='Allow image overwrite (default=False)')
parser.add_argument('--n_threads', type=int, default=1,
help='Number of threads to use for parallel operation (default=1)')
if len(sys.argv[1:])==0:
parser.print_help()
parser.exit()
args = parser.parse_args()
# Convert to an options object
options = TopFoldersToBottomOptions(
args.input_folder,
args.output_folder,
copy=args.copy,
n_threads=args.n_threads,
overwrite=args.overwrite)
top_folders_to_bottom(options)
if __name__ == '__main__':
main()