121 lines
3.2 KiB
Plaintext
121 lines
3.2 KiB
Plaintext
|
#!/usr/bin/env python
|
||
|
|
||
|
# usage: metasort SRCDIR DESTDIR
|
||
|
|
||
|
# this script iterates through every file in SRCDIR, copying them into a YYYY/MM/DD folder structure rooted in DESTDIR.
|
||
|
# EXIF data is read from jpg images. files with matching basenames to jpg images (.JPG.xmp, .ARW) are copied with their matching JPG images.
|
||
|
|
||
|
# TODO:
|
||
|
#
|
||
|
# copy images and associated files into folders
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
import re
|
||
|
import pathlib
|
||
|
import shutil
|
||
|
import exifread
|
||
|
import datetime
|
||
|
|
||
|
# parse and validate arguments
|
||
|
|
||
|
if len(sys.argv) != 3:
|
||
|
print("usage: metasort SRCDIR DESTDIR")
|
||
|
sys.exit(1)
|
||
|
|
||
|
src_dir = sys.argv[1]
|
||
|
dest_dir = sys.argv[2]
|
||
|
|
||
|
def check_is_dir(path):
|
||
|
if not os.path.isdir(path):
|
||
|
print("{} is not a directory".format(path))
|
||
|
sys.exit(1)
|
||
|
|
||
|
check_is_dir(src_dir)
|
||
|
check_is_dir(dest_dir)
|
||
|
|
||
|
# arguments ok!
|
||
|
|
||
|
src_filenames = next(os.walk(src_dir), (None, None, []))[2]
|
||
|
src_filenames.sort()
|
||
|
|
||
|
jpg_pattern = re.compile("^(.+)(\.JPG)$")
|
||
|
|
||
|
bad_jpgs = []
|
||
|
|
||
|
def get_all_files_for_name(filename):
|
||
|
stem = pathlib.Path(filename).stem
|
||
|
stem_pattern = re.compile("^({})(\..+)$".format(stem))
|
||
|
|
||
|
files = []
|
||
|
files.append(filename)
|
||
|
|
||
|
for other_filename in src_filenames:
|
||
|
|
||
|
if jpg_pattern.match(other_filename):
|
||
|
continue
|
||
|
|
||
|
if stem_pattern.match(other_filename):
|
||
|
files.append(other_filename)
|
||
|
|
||
|
return files
|
||
|
|
||
|
|
||
|
copied = 0
|
||
|
for src_filename in src_filenames:
|
||
|
|
||
|
if not jpg_pattern.match(src_filename):
|
||
|
continue
|
||
|
|
||
|
# read exif properties
|
||
|
path = os.path.join(src_dir, src_filename)
|
||
|
tags = exifread.process_file(open(path, 'rb'))
|
||
|
date_tag = "Image DateTime"
|
||
|
if not date_tag in tags:
|
||
|
bad_jpgs.append(src_filename)
|
||
|
continue
|
||
|
|
||
|
date = datetime.datetime.strptime(str(tags[date_tag]), "%Y:%m:%d %H:%M:%S")
|
||
|
|
||
|
# make a directory for this date
|
||
|
date_dir = os.path.join(dest_dir, str(date.year), f'{date.month:02}', f'{date.day:02}')
|
||
|
pathlib.Path(date_dir).mkdir(parents=True, exist_ok=True)
|
||
|
|
||
|
files = get_all_files_for_name(src_filename)
|
||
|
|
||
|
print("\n{}\t{}\t{}\t{}\n".format(src_filename, len(files), date, date_dir))
|
||
|
|
||
|
# copy all files into date_dir
|
||
|
for f in files:
|
||
|
f_src = os.path.join(src_dir, f)
|
||
|
f_dest = os.path.join(date_dir, f)
|
||
|
|
||
|
print(f"{f_src}\t->\t{f_dest}")
|
||
|
shutil.copy2(f_src, f_dest)
|
||
|
copied += 1
|
||
|
|
||
|
# deal with bad files
|
||
|
bad_dir = os.path.join(dest_dir, "corrupted")
|
||
|
bad_files = []
|
||
|
for bad_jpg in bad_jpgs:
|
||
|
for bad_file in get_all_files_for_name(bad_jpg):
|
||
|
bad_files.append(bad_file)
|
||
|
|
||
|
if len(bad_files) > 0:
|
||
|
print("could not read metadata from {} jpgs:\n".format(len(bad_jpgs)))
|
||
|
print(", ".join(bad_jpgs))
|
||
|
print("\nin total, {} corrupt files were found:\n".format(len(bad_files)))
|
||
|
print(", ".join(bad_files))
|
||
|
print("\nthese will be copied to {}. maybe they can be fixed eventually 😔".format(bad_dir))
|
||
|
|
||
|
pathlib.Path(bad_dir).mkdir(parents=True, exist_ok=True)
|
||
|
|
||
|
# copy bad files to corrupted dir
|
||
|
for bad_file in bad_files:
|
||
|
bad_file_src = os.path.join(src_dir, bad_file)
|
||
|
bad_file_dest = os.path.join(bad_dir, bad_file)
|
||
|
shutil.copy2(bad_file_src, bad_file_dest)
|
||
|
copied += 1
|
||
|
|
||
|
print(f"\nall done! {copied} files were copied in total. feel free to delete files in {src_dir}. have a nice day!")
|