solution_235a.py

#!/usr/bin/python3
# ====================================================================
# generate Huffman codes
#
# 1. start with a list of symbols and a list
#    of symbol frequencies
#
# 2. create a dictionary with the keys being the symbols
#    and the values their Huffman codes
#
# ====================================================================
# from: www.geeksforgeeks.org/huffman-coding-in-python/
# see : docs.python.org/3/library/heapq.html
# ====================================================================

import sys
import heapq

# --------------------------------------------------------------------
# ---- define the tree node class
# --------------------------------------------------------------------

class Node:

    def __init__(self, symbol=None, frequency=None):
        self.symbol    = symbol
        self.frequency = frequency
        self.left      = None
        self.right     = None

    def __lt__(self,other):
        return self.frequency < other.frequency

    def __str__(self):
        return f'symbol={self.symbol} frequency={self.frequency}'

# --------------------------------------------------------------------
# ---- build huffman tree from symbols (characters) and frequencies
# ---- return the root node of the tree
# --------------------------------------------------------------------

def build_huffman_tree(chars, freqs):

    # ---- create priority queue of nodes

    priority_queue = [Node(c,f) for c,f in zip(chars,freqs)]
    heapq.heapify(priority_queue)

    # ---- build the huffman tree

    while len(priority_queue) > 1:
        left_child  = heapq.heappop(priority_queue)
        right_child = heapq.heappop(priority_queue)
        merged_node = Node(frequency=left_child.frequency+right_child.frequency)
        merged_node.left  = left_child
        merged_node.right = right_child
        heapq.heappush(priority_queue,merged_node)
    
    return priority_queue[0]

# --------------------------------------------------------------------
# ---- generate huffman codes (one for each symbol/character)
# --------------------------------------------------------------------

def generate_huffman_codes(node, code='', huffman_codes={}):

    if node is not None:

        if node.symbol is not None:    
            huffman_codes[node.symbol] = code

        generate_huffman_codes(node.left,  code + '0', huffman_codes)
        generate_huffman_codes(node.right, code + '1', huffman_codes)

    return huffman_codes

# --------------------------------------------------------------------
# ---- print node information
# --------------------------------------------------------------------

def print_node(node, title=None):    
    if title: print(title)
    nleft  = node.left
    if nleft is not None:
        nleft = nleft.symbol        
    nright = node.right
    if nright is not None:
        nright = nright.symbol    
    print(f'sym={node.symbol} freq={node.frequency:<5} '+\
              f'left={nleft} right={nright}')

# --------------------------------------------------------------------
# ----
# --------------------------------------------------------------------

def print_priority_queue(priority_queue, title=None):
    if title: print(title)
    for node in priority_queue:
        print_node(node)

# --------------------------------------------------------------------
# ---- 
# --------------------------------------------------------------------

def print_huffman_codes(codes, title=None):
    if title is not None: print(title)
    for k,v in codes.items():
        print(f'char: {k} code: {v}')

# --------------------------------------------------------------------
# ---- main
# --------------------------------------------------------------------

# ---- example from website

#chars = ['a','b','c','d','e','f']   # characters
#freqs = [  4,  7, 15, 17, 22, 42]   # frequencies

# ---- example from my problem description

chars = [ ' ','a','e','f','h','i','m','n',
          's','t','l','o','p','r','u','x' ]
freqs = [ 7,4,4,3,2,2,2,2,2,2,1,1,1,1,1,1 ]

if len(chars) != len(freqs):
    print()
    print('Error: chars and freqs data are different sizes')
    print()
    sys.exit()

# ---- build_huffman tree

print()
print('build huffman tree')
root = build_huffman_tree(chars,freqs)

# ---- generate huffman codes

print()
print('generate huffman codes')
huffman_codes = generate_huffman_codes(root)

# ---- display huffman codes

print()
print_huffman_codes(huffman_codes,'Huffman Codes')

# ---- write hufman codes to a file so they can be used
# ---- by other programs to encode and decode data