#!/usr/bin/python3
# ====================================================================
# compare regular file checksums in two different directories
# ====================================================================
import os
import hashlib
dir_old = 'd:/abc'
dir_new = 'd:/xyz'
# --------------------------------------------------------------------
# ---- get a list of the filenames in a directory
# --------------------------------------------------------------------
def get_list_of_files(dir):
# --- get a list of entries in the directory
entries = os.listdir(dir)
# --- collect all of the filenames in the directory (skip links)
files = []
for f in entries:
ff = dir + '/' + f # path + filename
if os.path.isfile(ff):
if not os.path.islink(ff):
files.append(f)
files.sort()
return files
# --------------------------------------------------------------------
# ---- get file (path+name) MD5 checksum
# --------------------------------------------------------------------
def file_md5_checksum(filepath):
with open(filepath, 'rb') as bfile:
data = bfile.read()
cksum = hashlib.md5(data).hexdigest()
return cksum
# --------------------------------------------------------------------
# ---- compare file MD5 checksums (file is path+name)
# --------------------------------------------------------------------
def compare_checksums(file1,file2):
cs1 = file_md5_checksum(file1)
cs2 = file_md5_checksum(file2)
return cs1 == cs2
# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------
if __name__ == '__main__':
old_dir_files = get_list_of_files(dir_old)
new_dir_files = get_list_of_files(dir_new)
print()
print(f'{len(old_dir_files)} files found in old dir ({dir_old})')
print(f'{len(new_dir_files)} files found in new dir ({dir_new})')
# ---- compare MD5 checksums
checksum_match_count = 0
checksum_no_match_count = 0
filename_match_count = 0
print()
for filename in old_dir_files:
if filename in new_dir_files:
filename_match_count += 1
pathname_new = dir_new + '/' + filename
pathname_old = dir_old + '/' + filename
tf = compare_checksums(pathname_new,pathname_old)
if tf:
checksum_match_count += 1
continue
# ---- checksums do not match
checksum_no_match_count += 1
print(f'[{checksum_no_match_count:03}] ' +\
f'MD5 no match {filename}')
print()
print(f'checksum match count is {checksum_match_count}')
print(f'checksum no match count is {checksum_no_match_count}')
print()