From a28e5039d2a5087ca3c04376fd423ec0cedb64ad Mon Sep 17 00:00:00 2001 From: Juho Snellman Date: Fri, 26 Aug 2005 22:19:12 +0000 Subject: [PATCH] 0.9.4.8: Add a fallback implementation of SB-ROTATE-BYTE:%UNSIGNED-32-ROTATE-BYTE for platforms without the optimized VOPs, that isn't as pessimal as the generic %ROTATE-BYTE. About 70x speedup for SB-MD5 on x86-64. --- NEWS | 2 ++ contrib/sb-rotate-byte/compiler.lisp | 12 ++++++++++++ version.lisp-expr | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 39717a2..2f1eef1 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,8 @@ changes in sbcl-0.9.5 relative to sbcl-0.9.4: * bug fix: interrupts are disabled until startup is complete; no more sigsegvs when receiving a signal to soon + * optimization: Faster 32-bit SB-ROTATE-BYTE:ROTATE-BYTE on non-x86/ppc + platforms * threads ** bug fix: parent thread now can be gc'ed even with a live child thread diff --git a/contrib/sb-rotate-byte/compiler.lisp b/contrib/sb-rotate-byte/compiler.lisp index 2e7d5e1..2d0f195 100644 --- a/contrib/sb-rotate-byte/compiler.lisp +++ b/contrib/sb-rotate-byte/compiler.lisp @@ -56,3 +56,15 @@ ;; FIXME: What happens when, as here, the two type specifiers for ;; COUNT overlap? Which gets to run first? '(%unsigned-32-rotate-byte count integer)) + +;; Generic implementation for platforms that don't supply VOPs for 32-bit +;; rotate. +#-(or x86 ppc) +(deftransform %unsigned-32-rotate-byte ((.count. .integer.) + ((integer -31 31) + (unsigned-byte 32)) *) + '(if (< .count. 0) + (logior (ldb (byte 32 0) (ash .integer. (+ .count. 32))) + (ash .integer. .count.)) + (logior (ldb (byte 32 0) (ash .integer. .count.)) + (ash .integer. (- .count. 32))))) diff --git a/version.lisp-expr b/version.lisp-expr index 6f0e405..716efaf 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.4.8" +"0.9.4.9" -- 1.7.10.4