1 ;;;; a simple huffman encoder/decoder, used to compress unicode
4 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB!IMPL")
15 (defstruct (huffman-node (:constructor make-huffman-node (key weight)))
18 (defstruct (huffman-pair
19 (:include huffman-node)
20 (:constructor make-huffman-pair
22 (key (concatenate 'string
23 (huffman-node-key left)
24 (huffman-node-key right)))
25 (weight (+ (huffman-node-weight left)
26 (huffman-node-weight right))))))
29 (defun huffman-weights (corpus)
30 (let ((weight-table (make-hash-table :test #'equal)))
31 (loop for string in corpus
32 do (loop for char across string
33 do (incf (gethash char weight-table 0))))
35 (maphash (lambda (char weight)
36 (push (make-huffman-node (string char) weight) alist))
40 (defun make-huffman-tree (corpus)
41 (labels ((merge-table (table)
42 (setf table (sort table #'< :key #'huffman-node-weight))
43 (push (make-huffman-pair (pop table) (pop table))
49 (if (huffman-pair-p tree)
50 (list (huffman-node-key tree)
51 (finish-tree (huffman-pair-left tree))
52 (finish-tree (huffman-pair-right tree)))
53 (huffman-node-key tree))))
54 (finish-tree (merge-table (huffman-weights corpus)))))
56 (defun huffman-decode (code tree)
57 (let ((original code))
59 (let* ((bits (integer-length code))
60 (bit (ldb (byte 1 (- bits 2)) code)))
61 (setf code (dpb 1 (byte 1 (- bits 2))
62 (ldb (byte (- bits 1) 0) code)))
65 (destructuring-bind (key left right) branch
66 (declare (ignore key))
72 (error "Invalid Huffman-code: ~S" original))
73 (let ((next (choose branch)))
77 (concatenate 'string next (decode tree)))
82 (defun huffman-match (char node)
84 (find char (the string (car node)) :test #'equal)
85 (eql char (character node))))
87 (defun huffman-encode (string tree)
89 (labels ((encode (bit char tree)
91 (setf code (+ (ash code 1) bit)))
93 (destructuring-bind (key left right) tree
94 (declare (ignore key))
95 (cond ((huffman-match char left)
97 ((huffman-match char right)
98 (encode 1 char right))
101 (return-from huffman-encode nil))))
102 (unless (huffman-match char tree)
103 (error "Error encoding ~S (bad tree)." char)))))
104 (loop for char across string
105 do (encode nil char tree))