solution_218a.py

#!/usr/bin/python3
# ====================================================================
# compare regular file checksums in two different directories
# ====================================================================

import os
import hashlib

dir_old = 'd:/abc'
dir_new = 'd:/xyz'

# --------------------------------------------------------------------
# ---- get a list of the filenames in a directory
# --------------------------------------------------------------------

def get_list_of_files(dir):

    # --- get a list of entries in the directory

    entries = os.listdir(dir)

    # --- collect all of the filenames in the directory (skip links)

    files = []

    for f in entries:

        ff = dir + '/' + f          # path + filename

        if os.path.isfile(ff):        
            if not os.path.islink(ff):
                files.append(f)

    files.sort()

    return files

# --------------------------------------------------------------------
# ---- get file (path+name) MD5 checksum
# --------------------------------------------------------------------

def file_md5_checksum(filepath):

    with open(filepath, 'rb') as bfile:
        data = bfile.read()
    cksum = hashlib.md5(data).hexdigest()
    return cksum

# --------------------------------------------------------------------
# ---- compare file MD5 checksums (file is path+name)
# --------------------------------------------------------------------

def compare_checksums(file1,file2):
 
    cs1 = file_md5_checksum(file1)
    cs2 = file_md5_checksum(file2)

    return cs1 == cs2

# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------

if __name__ == '__main__':

    old_dir_files = get_list_of_files(dir_old)
    new_dir_files = get_list_of_files(dir_new)

    print()
    print(f'{len(old_dir_files)} files found in old dir ({dir_old})')
    print(f'{len(new_dir_files)} files found in new dir ({dir_new})')

    # ---- compare MD5 checksums

    checksum_match_count    = 0
    checksum_no_match_count = 0
    filename_match_count    = 0

    print()
    for filename in old_dir_files:
        if filename in new_dir_files:
            
            filename_match_count += 1

            pathname_new = dir_new + '/' + filename        
            pathname_old = dir_old + '/' + filename

            tf = compare_checksums(pathname_new,pathname_old)

            if tf:
                checksum_match_count += 1
                continue

            # ---- checksums do not match
            
            checksum_no_match_count += 1

            print(f'[{checksum_no_match_count:03}] ' +\
                  f'MD5 no match {filename}')

    print()
    print(f'checksum    match count is {checksum_match_count}')
    print(f'checksum no match count is {checksum_no_match_count}')
    print()