unused_files.py

#

Find/Delete files from source directory that are not used in any file in the target directory.

import logging
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from pathlib import Path

logging.basicConfig(
    handlers=[
        logging.StreamHandler(),
    ],
    format="%(asctime)s - %(filename)s:%(lineno)d - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO,
)
logging.captureWarnings(capture=True)
#
def parse_args():
    parser = ArgumentParser(description=__doc__, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s", "--source", required=True, help="Source directory")
    parser.add_argument("-t", "--target", required=True, help="Target directory")
    parser.add_argument("-d", "--delete", action="store_true", help="Delete unused files")
    parser.add_argument("-v", "--verbose", action="store_true")
    return parser.parse_args()
#
def file_name_is_used_in(files_in_target, file_name):
    for target_file in files_in_target:
        if target_file.is_file() and file_name in file_contents(target_file):
            return True
    else:
        return False
#
def file_contents(target_file):
    try:
        return target_file.read_text()
    except UnicodeDecodeError:
        return ""
#
def main(args):
    source_dir = args["source"]
    target_dir = args["target"]
    delete_file = args["delete"]
    logging.info(f"Find/delete({'y' if delete_file else 'n'}) files in {source_dir} which are not used in {target_dir}")

    files_in_source = Path(source_dir).glob("**/*")
    files_in_target = list(Path(target_dir).glob("**/*"))
    unused_files = [
        source
        for source in files_in_source
        if source.is_file() and not file_name_is_used_in(files_in_target, source.name)
    ]

    for unused_file in unused_files:
        logging.info(f"{unused_file.name} is unused in {target_dir}")
        if delete_file:
            unused_file.unlink()

    logging.info(f"{len(unused_files)} files {'deleted' if delete_file else 'found'}")


if __name__ == "__main__":
    args = parse_args()
    main(args.__dict__)