From 9e5e821bec4faad5eb84b3332d98aac497dabcf3 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Fri, 4 Nov 2005 12:51:17 +0000 Subject: [PATCH] 0.9.6.17: Performance enhancements to euc-jp external format (NIIMI Satoshi sbcl-devel 2005-10-28) --- NEWS | 2 + src/code/external-formats/eucjp.lisp | 70 ++++++++++++++++++++++++---------- version.lisp-expr | 2 +- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index 3f90b77..09bcece 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,8 @@ changes in sbcl-0.9.7 relative to sbcl-0.9.6: merged with *DEFAULT-PATHNAME-DEFAULTS*. * enhancement: the x86-64 disassembler is much better at disassembling SSE instructions. (thanks to Lutz Euler) + * optimization: improved performance of EUC-JP external format. + (thanks to NIIMI Satoshi) * optimization: performance improvements to IO on file streams of :ELEMENT-TYPE CHARACTER * optimization: much faster memory allocation on x86-64 diff --git a/src/code/external-formats/eucjp.lisp b/src/code/external-formats/eucjp.lisp index e5e493a..584fa7f 100644 --- a/src/code/external-formats/eucjp.lisp +++ b/src/code/external-formats/eucjp.lisp @@ -1,7 +1,49 @@ (in-package "SB!IMPL") -(let ((ucs-to-eucjp-table (make-hash-table)) - (eucjp-to-ucs-table (make-hash-table))) +(let ((ucs-to-eucjp-table (make-array #xFFFF + :element-type '(unsigned-byte 16) + :initial-element #xFFFF)) + (eucjp-to-ucs-table (make-array #xFFFF + :element-type '(unsigned-byte 16) + :initial-element #xFFFF))) + (labels ((eucjp-to-internal (code) + (declare (optimize speed (safety 0)) + (type fixnum code)) + (if (<= #x8F0000 code #x8FFFFF) + (logand code #xFF7F) + code)) + (internal-to-eucjp (code) + (declare (optimize speed (safety 0)) + (type fixnum code)) + (if (= (logand code #x8080) #x8000) + (logior code #x8F8080) + code))) + (declare (inline eucjp-to-internal internal-to-eucjp)) + (defun ucs-to-eucjp (code) + (declare (optimize speed (safety 0)) + (type fixnum code)) + (if (<= 0 code (length ucs-to-eucjp-table)) + (let ((x (aref ucs-to-eucjp-table code))) + (unless (= x #xFFFF) + (internal-to-eucjp x))))) + (defun eucjp-to-ucs (code) + (declare (optimize speed (safety 0)) + (type fixnum code)) + (let ((code (eucjp-to-internal code))) + (if (<= 0 code (length eucjp-to-ucs-table)) + (let ((x (aref eucjp-to-ucs-table code))) + (unless (= x #xFFFF) + x))))) + (defun set-ucs-to-eucjp (ucs eucjp) + (let ((eucjp (eucjp-to-internal eucjp))) + (if (= (aref ucs-to-eucjp-table ucs) #xFFFF) + (setf (aref ucs-to-eucjp-table ucs) eucjp) + (error "duplicated ucs: ~X" ucs)))) + (defun set-eucjp-to-ucs (eucjp ucs) + (let ((eucjp (eucjp-to-internal eucjp))) + (if (= (aref eucjp-to-ucs-table eucjp) #xFFFF) + (setf (aref eucjp-to-ucs-table eucjp) ucs) + (error "duplicated eucjp: ~X" eucjp))))) (let ((ucs<->eucjp ; bi-directional table UCS <-> EUC-JP ;; based on eucJP-ascii in ;; @@ -13027,27 +13069,13 @@ (#xFFE4 . #x8FA2C3) (#xFFE5 . #xA1EF)))) (dotimes (i 128) - (setf (gethash i ucs-to-eucjp-table) i) - (setf (gethash i eucjp-to-ucs-table) i)) + (set-ucs-to-eucjp i i) + (set-eucjp-to-ucs i i)) (dolist (pair ucs<->eucjp) - (when (gethash (car pair) ucs-to-eucjp-table) - (error "duplicated ucs: ~X" (car pair))) - (when (gethash (cdr pair) eucjp-to-ucs-table) - (error "duplicated eucjp: ~X" (car pair))) - (setf (gethash (car pair) ucs-to-eucjp-table) (cdr pair)) - (setf (gethash (cdr pair) eucjp-to-ucs-table) (car pair))) + (set-ucs-to-eucjp (car pair) (cdr pair)) + (set-eucjp-to-ucs (cdr pair) (car pair))) (dolist (pair ucs->eucjp) - (when (gethash (car pair) ucs-to-eucjp-table) - (error "duplicated ucs: ~X" (car pair))) - (setf (gethash (car pair) ucs-to-eucjp-table) (cdr pair)))) - (defun ucs-to-eucjp (code) - (declare (optimize speed (safety 0)) - (type fixnum code)) - (gethash code ucs-to-eucjp-table)) - (defun eucjp-to-ucs (code) - (declare (optimize speed (safety 0)) - (type fixnum code)) - (gethash code eucjp-to-ucs-table))) + (set-ucs-to-eucjp (car pair) (cdr pair))))) ;;; for fd-stream.lisp (define-external-format/variable-width (:euc-jp :eucjp :|eucJP|) t diff --git a/version.lisp-expr b/version.lisp-expr index cdc60e4..29c67cb 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.6.16" +"0.9.6.17" -- 1.7.10.4