#!/usr/bin/python3
# ====================================================================
# generate Huffman codes
#
# 1. start with a list of symbols and a list
# of symbol frequencies
#
# 2. create a dictionary with the keys being the symbols
# and the values their Huffman codes
#
# ====================================================================
# from: www.geeksforgeeks.org/huffman-coding-in-python/
# see : docs.python.org/3/library/heapq.html
# ====================================================================
import sys
import heapq
# --------------------------------------------------------------------
# ---- define the tree node class
# --------------------------------------------------------------------
class Node:
def __init__(self, symbol=None, frequency=None):
self.symbol = symbol
self.frequency = frequency
self.left = None
self.right = None
def __lt__(self,other):
return self.frequency < other.frequency
def __str__(self):
return f'symbol={self.symbol} frequency={self.frequency}'
# --------------------------------------------------------------------
# ---- build huffman tree from symbols (characters) and frequencies
# ---- return the root node of the tree
# --------------------------------------------------------------------
def build_huffman_tree(chars, freqs):
# ---- create priority queue of nodes
priority_queue = [Node(c,f) for c,f in zip(chars,freqs)]
heapq.heapify(priority_queue)
# ---- build the huffman tree
while len(priority_queue) > 1:
left_child = heapq.heappop(priority_queue)
right_child = heapq.heappop(priority_queue)
merged_node = Node(frequency=left_child.frequency+right_child.frequency)
merged_node.left = left_child
merged_node.right = right_child
heapq.heappush(priority_queue,merged_node)
return priority_queue[0]
# --------------------------------------------------------------------
# ---- generate huffman codes (one for each symbol/character)
# --------------------------------------------------------------------
def generate_huffman_codes(node, code='', huffman_codes={}):
if node is not None:
if node.symbol is not None:
huffman_codes[node.symbol] = code
generate_huffman_codes(node.left, code + '0', huffman_codes)
generate_huffman_codes(node.right, code + '1', huffman_codes)
return huffman_codes
# --------------------------------------------------------------------
# ---- print node information
# --------------------------------------------------------------------
def print_node(node, title=None):
if title: print(title)
nleft = node.left
if nleft is not None:
nleft = nleft.symbol
nright = node.right
if nright is not None:
nright = nright.symbol
print(f'sym={node.symbol} freq={node.frequency:<5} '+\
f'left={nleft} right={nright}')
# --------------------------------------------------------------------
# ----
# --------------------------------------------------------------------
def print_priority_queue(priority_queue, title=None):
if title: print(title)
for node in priority_queue:
print_node(node)
# --------------------------------------------------------------------
# ----
# --------------------------------------------------------------------
def print_huffman_codes(codes, title=None):
if title is not None: print(title)
for k,v in codes.items():
print(f'char: {k} code: {v}')
# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------
# ---- example from website
#chars = ['a','b','c','d','e','f'] # characters
#freqs = [ 4, 7, 15, 17, 22, 42] # frequencies
# ---- example from my problem description
chars = [ ' ','a','e','f','h','i','m','n',
's','t','l','o','p','r','u','x' ]
freqs = [ 7,4,4,3,2,2,2,2,2,2,1,1,1,1,1,1 ]
if len(chars) != len(freqs):
print()
print('Error: chars and freqs data are different sizes')
print()
sys.exit()
# ---- build_huffman tree
print()
print('build huffman tree')
root = build_huffman_tree(chars,freqs)
# ---- generate huffman codes
print()
print('generate huffman codes')
huffman_codes = generate_huffman_codes(root)
# ---- display huffman codes
print()
print_huffman_codes(huffman_codes,'Huffman Codes')
# ---- write hufman codes to a file so they can be used
# ---- by other programs to encode and decode data