dsa.huffman
Module to access functions for Huffman Compression.
1""" Module to access functions for Huffman Compression. """ 2from dsa.tree import Tree, TreeNode 3from dsa.heap import PriorityQueue 4 5def character_frequency(s: str) -> dict: 6 """ 7 Takes a string a returns a dictionary of character frequencies. 8 9 Args: 10 s (str): The string to analyze. 11 12 Returns: 13 Dictionary containing character frequency. 14 """ 15 d = {} 16 for c in s: 17 if c in d: 18 d[c] += 1 19 else: 20 d[c] = 1 21 return d 22 23def build_frequency_table(s: str) -> PriorityQueue: 24 """ 25 Accepts a string to encode and returns a heap of the characters. 26 27 Args: 28 s (str): The string to encode. 29 30 Returns: 31 A priority queue of the characters based on frequencies. 32 """ 33 frequency_dictionary = character_frequency(s) 34 35 pq = PriorityQueue() 36 37 for char, count in frequency_dictionary.items(): 38 pq.push(count, TreeNode(char))#, None, None)) 39 40 return pq 41 42def build_huffman_tree(pq: PriorityQueue) -> Tree: 43 """ 44 Accepts a priority queue and returns a Huffman Tree. 45 46 Args: 47 pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies. 48 49 Returns: 50 A Huffman Tree. 51 """ 52 while len(pq) > 1: 53 priority1, node1 = pq.pop_pair() 54 priority2, node2 = pq.pop_pair() 55 node = TreeNode(node1.value + node2.value, node1, node2) 56 pq.push(priority1 + priority2, node) 57 58 return Tree(pq.pop()) 59 60def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict: 61 """ 62 Given a TreeNode, build a Huffman Dictionary. 63 64 Args: 65 node (TreeNode): The Huffman Node. 66 bit_string (str): The bit string. 67 68 Returns: 69 A Huffman Dictionary. 70 """ 71 d = {} 72 if node.left is None and node.right is None: 73 return {node.value: bit_string} 74 75 d.update(build_huffman_dictionary(node.left, bit_string + '0')) 76 d.update(build_huffman_dictionary(node.right, bit_string + '1')) 77 78 return d 79 80def huffman_encode(st: str, hd: dict) -> str: 81 """ 82 Encode the string using the Huffman Dictionary. 83 84 Args: 85 st (str): The string to encode. 86 hd (dict): The Huffman Dictionary. 87 88 Returns: 89 The encoded string. 90 """ 91 s = "" 92 for c in st: 93 s += hd[c] 94 return s 95 96def huffman_decode(encoded_data: str, tree: Tree) -> str: 97 """ 98 Decode the encoded data using the Huffman Tree. 99 100 Args: 101 encoded_data (str): The encoded data. 102 tree (Tree): The Huffman Tree. 103 104 Returns: 105 The decoded data. 106 """ 107 node = tree.root 108 s = "" 109 for bit in encoded_data: 110 if int(bit) == 0: 111 node = node.left 112 else: 113 node = node.right 114 115 if node.left is None and node.right is None: 116 s += node.value 117 node = tree.root 118 return s 119 120def bitstring_to_bytes(s: str) -> bytes: 121 """ 122 Convert a bitstring to bytes. 123 124 Args: 125 s (str): The bitstring. 126 127 Returns: 128 Bitstring converted to bytes. 129 """ 130 return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8)) 131 132def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str: 133 """ 134 Convert bytes to bitstring. 135 136 Args: 137 ba (bytes): The bytes to convert. 138 bitlength (int): The bit length. 139 140 Returns: 141 The bytes converted to bitstring. 142 """ 143 if not ba: 144 return "" 145 146 s = "" 147 for b in ba[:-1]: 148 byte = f"{b:08b}" 149 s += byte 150 151 byte = f"{ba[-1]:b}".zfill(bitlength) 152 s += byte 153 154 return s
6def character_frequency(s: str) -> dict: 7 """ 8 Takes a string a returns a dictionary of character frequencies. 9 10 Args: 11 s (str): The string to analyze. 12 13 Returns: 14 Dictionary containing character frequency. 15 """ 16 d = {} 17 for c in s: 18 if c in d: 19 d[c] += 1 20 else: 21 d[c] = 1 22 return d
Takes a string a returns a dictionary of character frequencies.
Args: s (str): The string to analyze.
Returns: Dictionary containing character frequency.
24def build_frequency_table(s: str) -> PriorityQueue: 25 """ 26 Accepts a string to encode and returns a heap of the characters. 27 28 Args: 29 s (str): The string to encode. 30 31 Returns: 32 A priority queue of the characters based on frequencies. 33 """ 34 frequency_dictionary = character_frequency(s) 35 36 pq = PriorityQueue() 37 38 for char, count in frequency_dictionary.items(): 39 pq.push(count, TreeNode(char))#, None, None)) 40 41 return pq
Accepts a string to encode and returns a heap of the characters.
Args: s (str): The string to encode.
Returns: A priority queue of the characters based on frequencies.
43def build_huffman_tree(pq: PriorityQueue) -> Tree: 44 """ 45 Accepts a priority queue and returns a Huffman Tree. 46 47 Args: 48 pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies. 49 50 Returns: 51 A Huffman Tree. 52 """ 53 while len(pq) > 1: 54 priority1, node1 = pq.pop_pair() 55 priority2, node2 = pq.pop_pair() 56 node = TreeNode(node1.value + node2.value, node1, node2) 57 pq.push(priority1 + priority2, node) 58 59 return Tree(pq.pop())
Accepts a priority queue and returns a Huffman Tree.
Args: pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies.
Returns: A Huffman Tree.
61def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict: 62 """ 63 Given a TreeNode, build a Huffman Dictionary. 64 65 Args: 66 node (TreeNode): The Huffman Node. 67 bit_string (str): The bit string. 68 69 Returns: 70 A Huffman Dictionary. 71 """ 72 d = {} 73 if node.left is None and node.right is None: 74 return {node.value: bit_string} 75 76 d.update(build_huffman_dictionary(node.left, bit_string + '0')) 77 d.update(build_huffman_dictionary(node.right, bit_string + '1')) 78 79 return d
Given a TreeNode, build a Huffman Dictionary.
Args: node (TreeNode): The Huffman Node. bit_string (str): The bit string.
Returns: A Huffman Dictionary.
81def huffman_encode(st: str, hd: dict) -> str: 82 """ 83 Encode the string using the Huffman Dictionary. 84 85 Args: 86 st (str): The string to encode. 87 hd (dict): The Huffman Dictionary. 88 89 Returns: 90 The encoded string. 91 """ 92 s = "" 93 for c in st: 94 s += hd[c] 95 return s
Encode the string using the Huffman Dictionary.
Args: st (str): The string to encode. hd (dict): The Huffman Dictionary.
Returns: The encoded string.
97def huffman_decode(encoded_data: str, tree: Tree) -> str: 98 """ 99 Decode the encoded data using the Huffman Tree. 100 101 Args: 102 encoded_data (str): The encoded data. 103 tree (Tree): The Huffman Tree. 104 105 Returns: 106 The decoded data. 107 """ 108 node = tree.root 109 s = "" 110 for bit in encoded_data: 111 if int(bit) == 0: 112 node = node.left 113 else: 114 node = node.right 115 116 if node.left is None and node.right is None: 117 s += node.value 118 node = tree.root 119 return s
Decode the encoded data using the Huffman Tree.
Args: encoded_data (str): The encoded data. tree (Tree): The Huffman Tree.
Returns: The decoded data.
121def bitstring_to_bytes(s: str) -> bytes: 122 """ 123 Convert a bitstring to bytes. 124 125 Args: 126 s (str): The bitstring. 127 128 Returns: 129 Bitstring converted to bytes. 130 """ 131 return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
Convert a bitstring to bytes.
Args: s (str): The bitstring.
Returns: Bitstring converted to bytes.
133def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str: 134 """ 135 Convert bytes to bitstring. 136 137 Args: 138 ba (bytes): The bytes to convert. 139 bitlength (int): The bit length. 140 141 Returns: 142 The bytes converted to bitstring. 143 """ 144 if not ba: 145 return "" 146 147 s = "" 148 for b in ba[:-1]: 149 byte = f"{b:08b}" 150 s += byte 151 152 byte = f"{ba[-1]:b}".zfill(bitlength) 153 s += byte 154 155 return s
Convert bytes to bitstring.
Args: ba (bytes): The bytes to convert. bitlength (int): The bit length.
Returns: The bytes converted to bitstring.