From abc6bed30fe634a0542d5a1424481447ba18386c Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Fri, 8 Jan 2010 02:21:15 +0000 Subject: [PATCH] 1.0.34.6: improvements to SB-ROTATE-BYTE on x86-64 - Generate ROL/ROR instructions for 32-bit rotates, rather than shifts, ands, and ors; - Generate ROL/ROR instructions for 64-bit rotates. While we're here, we might as well fix the FIXME about the ordering of DEFTRANSFORMS to ensure we do the right thing for identity rotates. --- NEWS | 4 ++++ contrib/sb-rotate-byte/compiler.lisp | 30 ++++++++++++++++++------- contrib/sb-rotate-byte/rotate-byte-tests.lisp | 22 ++++++++++++++++++ contrib/sb-rotate-byte/sb-rotate-byte.asd | 2 ++ version.lisp-expr | 2 +- 5 files changed, 51 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index fad4188..7dfb70e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ ;;;; -*- coding: utf-8; fill-column: 78 -*- changes relative to sbcl-1.0.34: + * optimization: SB-ROTATE-BYTE:ROTATE-BYTE now generates more efficient + code for 32-bit and 64-bit rotations on x86-64. + * bug fix: Passing a rotation count of zero to SB-ROTATE-BYTE:ROTATE-BYTE + no longer causes a compiler error on x86 and ppc. * bug fix: GET-MACRO-CHARACTER bogusly computed its second return value always relative to *READTABLE* rather than the passed argument. diff --git a/contrib/sb-rotate-byte/compiler.lisp b/contrib/sb-rotate-byte/compiler.lisp index 2d0f195..58937d6 100644 --- a/contrib/sb-rotate-byte/compiler.lisp +++ b/contrib/sb-rotate-byte/compiler.lisp @@ -7,6 +7,10 @@ (defknown %unsigned-32-rotate-byte ((integer -31 31) (unsigned-byte 32)) (unsigned-byte 32) (foldable flushable)) +#+x86-64 +(defknown %unsigned-64-rotate-byte ((integer -63 63) (unsigned-byte 64)) + (unsigned-byte 64) + (foldable flushable)) (macrolet (;; see src/compiler/srctran.lisp (with-byte-specifier ((size-var pos-var spec) &body body) @@ -43,23 +47,16 @@ *universal-type*))) (deftransform %rotate-byte ((count size pos integer) - ((constant-arg (member 0)) * * *) *) - "fold identity operation" - 'integer) - -(deftransform %rotate-byte ((count size pos integer) ((integer -31 31) (constant-arg (member 32)) (constant-arg (member 0)) (unsigned-byte 32)) *) "inline 32-bit rotation" - ;; FIXME: What happens when, as here, the two type specifiers for - ;; COUNT overlap? Which gets to run first? '(%unsigned-32-rotate-byte count integer)) ;; Generic implementation for platforms that don't supply VOPs for 32-bit ;; rotate. -#-(or x86 ppc) +#-(or x86 x86-64 ppc) (deftransform %unsigned-32-rotate-byte ((.count. .integer.) ((integer -31 31) (unsigned-byte 32)) *) @@ -68,3 +65,20 @@ (ash .integer. .count.)) (logior (ldb (byte 32 0) (ash .integer. .count.)) (ash .integer. (- .count. 32))))) + +#+x86-64 +(deftransform %rotate-byte ((count size pos integer) + ((integer -63 63) + (constant-arg (member 64)) + (constant-arg (member 0)) + (unsigned-byte 64)) *) + "inline 64-bit rotation" + '(%unsigned-64-rotate-byte count integer)) + +;;; This transform needs to come after the others to ensure it gets +;;; first crack at a zero COUNT, since transforms are currently run +;;; latest-defined first. +(deftransform %rotate-byte ((count size pos integer) + ((constant-arg (member 0)) * * *) *) + "fold identity operation" + 'integer) diff --git a/contrib/sb-rotate-byte/rotate-byte-tests.lisp b/contrib/sb-rotate-byte/rotate-byte-tests.lisp index 252e06d..11f9b63 100644 --- a/contrib/sb-rotate-byte/rotate-byte-tests.lisp +++ b/contrib/sb-rotate-byte/rotate-byte-tests.lisp @@ -1,5 +1,8 @@ (in-package "SB-ROTATE-BYTE") +;;; Ensure we don't bug out with an identity rotation. +(assert (= (rotate-byte 0 (byte 32 0) 3) 3)) + (assert (= (rotate-byte 3 (byte 32 0) 3) 24)) (assert (= (rotate-byte 3 (byte 16 0) 3) 24)) (assert (= (rotate-byte 3 (byte 2 0) 3) 3)) @@ -65,3 +68,22 @@ (assert (= (ub32-reg-pressure 5 5) 10880)) (assert (= (ub32-reg-pressure 5 (ash 1 26)) 2147494368)) (assert (= (ub32-reg-pressure 5 (ash 1 27)) 10721)) + +(defun ub64/c (integer) + (declare (type (unsigned-byte 64) integer)) + (rotate-byte 6 (byte 64 0) integer)) + +(assert (= (ub64/c 5) 320)) +(assert (= (ub64/c 1) 64)) +(assert (= (ub64/c (ash 1 57)) (ash 1 63))) +(assert (= (ub64/c (ash 1 58)) 1)) + +(defun ub64 (count integer) + (declare (type (unsigned-byte 64) integer) + (type (integer -63 63) count)) + (rotate-byte count (byte 64 0) integer)) + +(assert (= (ub64 6 5) 320)) +(assert (= (ub64 6 1) 64)) +(assert (= (ub64 6 (ash 1 57)) (ash 1 63))) +(assert (= (ub64 6 (ash 1 58)) 1)) diff --git a/contrib/sb-rotate-byte/sb-rotate-byte.asd b/contrib/sb-rotate-byte/sb-rotate-byte.asd index 40ae7dd..8cf6d47 100644 --- a/contrib/sb-rotate-byte/sb-rotate-byte.asd +++ b/contrib/sb-rotate-byte/sb-rotate-byte.asd @@ -16,6 +16,8 @@ :components ((:file "x86-vm" :in-order-to ((compile-op (feature :x86)))) + (:file "x86-64-vm" + :in-order-to ((compile-op (feature :x86-64)))) (:file "ppc-vm" :in-order-to ((compile-op (feature :ppc))))) :pathname diff --git a/version.lisp-expr b/version.lisp-expr index e85b6aa..b5da4fe 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.34.5" +"1.0.34.6" -- 1.7.10.4