dsa.huffman

Module to access functions for Huffman Compression.

  1""" Module to access functions for Huffman Compression. """
  2from dsa.tree import Tree, TreeNode
  3from dsa.heap import PriorityQueue
  4
  5def character_frequency(s: str) -> dict:
  6    """ 
  7    Takes a string a returns a dictionary of character frequencies.
  8
  9    Args:
 10        s (str): The string to analyze.
 11
 12    Returns:
 13        Dictionary containing character frequency.
 14    """
 15    d = {}
 16    for c in s:
 17        if c in d:
 18            d[c] += 1
 19        else:
 20            d[c] = 1
 21    return d
 22
 23def build_frequency_table(s: str) -> PriorityQueue:
 24    """ 
 25    Accepts a string to encode and returns a heap of the characters.
 26
 27    Args:
 28        s (str): The string to encode.
 29
 30    Returns:
 31        A priority queue of the characters based on frequencies.
 32    """
 33    frequency_dictionary = character_frequency(s)
 34    
 35    pq = PriorityQueue()
 36
 37    for char, count in frequency_dictionary.items():
 38        pq.push(count, TreeNode(char))#, None, None))
 39
 40    return pq
 41
 42def build_huffman_tree(pq: PriorityQueue) -> Tree:
 43    """ 
 44    Accepts a priority queue and returns a Huffman Tree.
 45
 46    Args:
 47        pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies. 
 48
 49    Returns:
 50        A Huffman Tree.
 51    """
 52    while len(pq) > 1:
 53        priority1, node1 = pq.pop_pair()
 54        priority2, node2 = pq.pop_pair()
 55        node = TreeNode(node1.value + node2.value, node1, node2)
 56        pq.push(priority1 + priority2, node)
 57
 58    return Tree(pq.pop())
 59
 60def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict:
 61    """
 62    Given a TreeNode, build a Huffman Dictionary.
 63
 64    Args:
 65        node (TreeNode): The Huffman Node.
 66        bit_string (str): The bit string.
 67
 68    Returns:
 69        A Huffman Dictionary.
 70    """
 71    d = {}
 72    if node.left is None and node.right is None:
 73        return {node.value: bit_string}
 74
 75    d.update(build_huffman_dictionary(node.left, bit_string + '0'))
 76    d.update(build_huffman_dictionary(node.right, bit_string + '1'))
 77
 78    return d
 79
 80def huffman_encode(st: str, hd: dict) -> str:               
 81    """
 82    Encode the string using the Huffman Dictionary.
 83
 84    Args:
 85        st (str): The string to encode.
 86        hd (dict): The Huffman Dictionary.
 87
 88    Returns:
 89        The encoded string.
 90    """
 91    s = ""
 92    for c in st:
 93        s += hd[c]
 94    return s
 95
 96def huffman_decode(encoded_data: str, tree: Tree) -> str:
 97    """
 98    Decode the encoded data using the Huffman Tree.
 99    
100    Args:
101        encoded_data (str): The encoded data.
102        tree (Tree): The Huffman Tree.
103
104    Returns:
105        The decoded data.
106    """
107    node = tree.root
108    s = ""
109    for bit in encoded_data:
110        if int(bit) == 0:
111            node = node.left
112        else:
113            node = node.right
114
115        if node.left is None and node.right is None: 
116            s += node.value
117            node = tree.root
118    return s
119
120def bitstring_to_bytes(s: str) -> bytes:
121    """
122    Convert a bitstring to bytes.
123
124    Args:
125        s (str): The bitstring.
126
127    Returns:
128        Bitstring converted to bytes.
129    """
130    return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
131
132def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str:
133    """
134    Convert bytes to bitstring.
135
136    Args:
137        ba (bytes): The bytes to convert.
138        bitlength (int): The bit length.
139    
140    Returns:
141        The bytes converted to bitstring.
142    """
143    if not ba:
144        return ""
145    
146    s = ""
147    for b in ba[:-1]:
148        byte = f"{b:08b}"
149        s += byte
150    
151    byte = f"{ba[-1]:b}".zfill(bitlength) 
152    s += byte
153
154    return s
def character_frequency(s: str) -> dict:
 6def character_frequency(s: str) -> dict:
 7    """ 
 8    Takes a string a returns a dictionary of character frequencies.
 9
10    Args:
11        s (str): The string to analyze.
12
13    Returns:
14        Dictionary containing character frequency.
15    """
16    d = {}
17    for c in s:
18        if c in d:
19            d[c] += 1
20        else:
21            d[c] = 1
22    return d

Takes a string a returns a dictionary of character frequencies.

Args: s (str): The string to analyze.

Returns: Dictionary containing character frequency.

def build_frequency_table(s: str) -> dsa.heap.PriorityQueue:
24def build_frequency_table(s: str) -> PriorityQueue:
25    """ 
26    Accepts a string to encode and returns a heap of the characters.
27
28    Args:
29        s (str): The string to encode.
30
31    Returns:
32        A priority queue of the characters based on frequencies.
33    """
34    frequency_dictionary = character_frequency(s)
35    
36    pq = PriorityQueue()
37
38    for char, count in frequency_dictionary.items():
39        pq.push(count, TreeNode(char))#, None, None))
40
41    return pq

Accepts a string to encode and returns a heap of the characters.

Args: s (str): The string to encode.

Returns: A priority queue of the characters based on frequencies.

def build_huffman_tree(pq: dsa.heap.PriorityQueue) -> dsa.tree.Tree:
43def build_huffman_tree(pq: PriorityQueue) -> Tree:
44    """ 
45    Accepts a priority queue and returns a Huffman Tree.
46
47    Args:
48        pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies. 
49
50    Returns:
51        A Huffman Tree.
52    """
53    while len(pq) > 1:
54        priority1, node1 = pq.pop_pair()
55        priority2, node2 = pq.pop_pair()
56        node = TreeNode(node1.value + node2.value, node1, node2)
57        pq.push(priority1 + priority2, node)
58
59    return Tree(pq.pop())

Accepts a priority queue and returns a Huffman Tree.

Args: pq (PriorityQueue): A PriorityQueue containing TreeNodes of characters based on frequencies.

Returns: A Huffman Tree.

def build_huffman_dictionary(node: dsa.tree.TreeNode, bit_string: str = '') -> dict:
61def build_huffman_dictionary(node: TreeNode, bit_string: str="") -> dict:
62    """
63    Given a TreeNode, build a Huffman Dictionary.
64
65    Args:
66        node (TreeNode): The Huffman Node.
67        bit_string (str): The bit string.
68
69    Returns:
70        A Huffman Dictionary.
71    """
72    d = {}
73    if node.left is None and node.right is None:
74        return {node.value: bit_string}
75
76    d.update(build_huffman_dictionary(node.left, bit_string + '0'))
77    d.update(build_huffman_dictionary(node.right, bit_string + '1'))
78
79    return d

Given a TreeNode, build a Huffman Dictionary.

Args: node (TreeNode): The Huffman Node. bit_string (str): The bit string.

Returns: A Huffman Dictionary.

def huffman_encode(st: str, hd: dict) -> str:
81def huffman_encode(st: str, hd: dict) -> str:               
82    """
83    Encode the string using the Huffman Dictionary.
84
85    Args:
86        st (str): The string to encode.
87        hd (dict): The Huffman Dictionary.
88
89    Returns:
90        The encoded string.
91    """
92    s = ""
93    for c in st:
94        s += hd[c]
95    return s

Encode the string using the Huffman Dictionary.

Args: st (str): The string to encode. hd (dict): The Huffman Dictionary.

Returns: The encoded string.

def huffman_decode(encoded_data: str, tree: dsa.tree.Tree) -> str:
 97def huffman_decode(encoded_data: str, tree: Tree) -> str:
 98    """
 99    Decode the encoded data using the Huffman Tree.
100    
101    Args:
102        encoded_data (str): The encoded data.
103        tree (Tree): The Huffman Tree.
104
105    Returns:
106        The decoded data.
107    """
108    node = tree.root
109    s = ""
110    for bit in encoded_data:
111        if int(bit) == 0:
112            node = node.left
113        else:
114            node = node.right
115
116        if node.left is None and node.right is None: 
117            s += node.value
118            node = tree.root
119    return s

Decode the encoded data using the Huffman Tree.

Args: encoded_data (str): The encoded data. tree (Tree): The Huffman Tree.

Returns: The decoded data.

def bitstring_to_bytes(s: str) -> bytes:
121def bitstring_to_bytes(s: str) -> bytes:
122    """
123    Convert a bitstring to bytes.
124
125    Args:
126        s (str): The bitstring.
127
128    Returns:
129        Bitstring converted to bytes.
130    """
131    return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))

Convert a bitstring to bytes.

Args: s (str): The bitstring.

Returns: Bitstring converted to bytes.

def bytes_to_bitstring(ba: bytes, bitlength: int = 8) -> str:
133def bytes_to_bitstring(ba: bytes, bitlength: int=8) -> str:
134    """
135    Convert bytes to bitstring.
136
137    Args:
138        ba (bytes): The bytes to convert.
139        bitlength (int): The bit length.
140    
141    Returns:
142        The bytes converted to bitstring.
143    """
144    if not ba:
145        return ""
146    
147    s = ""
148    for b in ba[:-1]:
149        byte = f"{b:08b}"
150        s += byte
151    
152    byte = f"{ba[-1]:b}".zfill(bitlength) 
153    s += byte
154
155    return s

Convert bytes to bitstring.

Args: ba (bytes): The bytes to convert. bitlength (int): The bit length.

Returns: The bytes converted to bitstring.