#!/usr/bin/python3 # ==================================================================== # compare regular file checksums in two different directories # ==================================================================== import os import hashlib dir_old = 'd:/abc' dir_new = 'd:/xyz' # -------------------------------------------------------------------- # ---- get a list of the filenames in a directory # -------------------------------------------------------------------- def get_list_of_files(dir): # --- get a list of entries in the directory entries = os.listdir(dir) # --- collect all of the filenames in the directory (skip links) files = [] for f in entries: ff = dir + '/' + f # path + filename if os.path.isfile(ff): if not os.path.islink(ff): files.append(f) files.sort() return files # -------------------------------------------------------------------- # ---- get file (path+name) MD5 checksum # -------------------------------------------------------------------- def file_md5_checksum(filepath): with open(filepath, 'rb') as bfile: data = bfile.read() cksum = hashlib.md5(data).hexdigest() return cksum # -------------------------------------------------------------------- # ---- compare file MD5 checksums (file is path+name) # -------------------------------------------------------------------- def compare_checksums(file1,file2): cs1 = file_md5_checksum(file1) cs2 = file_md5_checksum(file2) return cs1 == cs2 # -------------------------------------------------------------------- # ---- main # -------------------------------------------------------------------- if __name__ == '__main__': old_dir_files = get_list_of_files(dir_old) new_dir_files = get_list_of_files(dir_new) print() print(f'{len(old_dir_files)} files found in old dir ({dir_old})') print(f'{len(new_dir_files)} files found in new dir ({dir_new})') # ---- compare MD5 checksums checksum_match_count = 0 checksum_no_match_count = 0 filename_match_count = 0 print() for filename in old_dir_files: if filename in new_dir_files: filename_match_count += 1 pathname_new = dir_new + '/' + filename pathname_old = dir_old + '/' + filename tf = compare_checksums(pathname_new,pathname_old) if tf: checksum_match_count += 1 continue # ---- checksums do not match checksum_no_match_count += 1 print(f'[{checksum_no_match_count:03}] ' +\ f'MD5 no match {filename}') print() print(f'checksum match count is {checksum_match_count}') print(f'checksum no match count is {checksum_no_match_count}') print()