#!/usr/bin/python3 # ==================================================================== # generate Huffman codes # # 1. start with a list of symbols and a list # of symbol frequencies # # 2. create a dictionary with the keys being the symbols # and the values their Huffman codes # # ==================================================================== # from: www.geeksforgeeks.org/huffman-coding-in-python/ # see : docs.python.org/3/library/heapq.html # ==================================================================== import sys import heapq # -------------------------------------------------------------------- # ---- define the tree node class # -------------------------------------------------------------------- class Node: def __init__(self, symbol=None, frequency=None): self.symbol = symbol self.frequency = frequency self.left = None self.right = None def __lt__(self,other): return self.frequency < other.frequency def __str__(self): return f'symbol={self.symbol} frequency={self.frequency}' # -------------------------------------------------------------------- # ---- build huffman tree from symbols (characters) and frequencies # ---- return the root node of the tree # -------------------------------------------------------------------- def build_huffman_tree(chars, freqs): # ---- create priority queue of nodes priority_queue = [Node(c,f) for c,f in zip(chars,freqs)] heapq.heapify(priority_queue) # ---- build the huffman tree while len(priority_queue) > 1: left_child = heapq.heappop(priority_queue) right_child = heapq.heappop(priority_queue) merged_node = Node(frequency=left_child.frequency+right_child.frequency) merged_node.left = left_child merged_node.right = right_child heapq.heappush(priority_queue,merged_node) return priority_queue[0] # -------------------------------------------------------------------- # ---- generate huffman codes (one for each symbol/character) # -------------------------------------------------------------------- def generate_huffman_codes(node, code='', huffman_codes={}): if node is not None: if node.symbol is not None: huffman_codes[node.symbol] = code generate_huffman_codes(node.left, code + '0', huffman_codes) generate_huffman_codes(node.right, code + '1', huffman_codes) return huffman_codes # -------------------------------------------------------------------- # ---- print node information # -------------------------------------------------------------------- def print_node(node, title=None): if title: print(title) nleft = node.left if nleft is not None: nleft = nleft.symbol nright = node.right if nright is not None: nright = nright.symbol print(f'sym={node.symbol} freq={node.frequency:<5} '+\ f'left={nleft} right={nright}') # -------------------------------------------------------------------- # ---- # -------------------------------------------------------------------- def print_priority_queue(priority_queue, title=None): if title: print(title) for node in priority_queue: print_node(node) # -------------------------------------------------------------------- # ---- # -------------------------------------------------------------------- def print_huffman_codes(codes, title=None): if title is not None: print(title) for k,v in codes.items(): print(f'char: {k} code: {v}') # -------------------------------------------------------------------- # ---- main # -------------------------------------------------------------------- # ---- example from website #chars = ['a','b','c','d','e','f'] # characters #freqs = [ 4, 7, 15, 17, 22, 42] # frequencies # ---- example from my problem description chars = [ ' ','a','e','f','h','i','m','n', 's','t','l','o','p','r','u','x' ] freqs = [ 7,4,4,3,2,2,2,2,2,2,1,1,1,1,1,1 ] if len(chars) != len(freqs): print() print('Error: chars and freqs data are different sizes') print() sys.exit() # ---- build_huffman tree print() print('build huffman tree') root = build_huffman_tree(chars,freqs) # ---- generate huffman codes print() print('generate huffman codes') huffman_codes = generate_huffman_codes(root) # ---- display huffman codes print() print_huffman_codes(huffman_codes,'Huffman Codes') # ---- write hufman codes to a file so they can be used # ---- by other programs to encode and decode data