From: Nathan Froyd Date: Thu, 30 Jun 2005 20:56:33 +0000 (+0000) Subject: 0.9.2.7: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=69d60b456b07a0256f08df0d02484f361ce5737c;p=sbcl.git 0.9.2.7: Enable slightly more efficient operations with 32-bit constants on PPC. This reduces our instruction count in the inner loop of SB-MD5 by nearly 10% and brings us closer to the code that GCC generates. --- diff --git a/src/compiler/ppc/arith.lisp b/src/compiler/ppc/arith.lisp index 28d4acb..09a4f82 100644 --- a/src/compiler/ppc/arith.lisp +++ b/src/compiler/ppc/arith.lisp @@ -89,6 +89,15 @@ (:result-types tagged-num) (:note "inline fixnum arithmetic")) +(define-vop (fast-fixnum-binop30-c fast-safe-arith-op) + (:args (x :target r :scs (any-reg zero))) + (:info y) + (:arg-types tagged-num + (:constant (and (signed-byte 30) (not (integer 0 0))))) + (:results (r :scs (any-reg))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic")) + (define-vop (fast-fixnum-logop-c fast-safe-arith-op) (:args (x :target r :scs (any-reg zero))) (:info y) @@ -98,6 +107,15 @@ (:result-types tagged-num) (:note "inline fixnum logical op")) +(define-vop (fast-fixnum-logop30-c fast-safe-arith-op) + (:args (x :target r :scs (any-reg zero))) + (:info y) + (:arg-types tagged-num + (:constant (and (unsigned-byte 16) (not (integer 0 0))))) + (:results (r :scs (any-reg))) + (:result-types tagged-num) + (:note "inline fixnum logical op")) + (define-vop (fast-unsigned-binop-c fast-safe-arith-op) (:args (x :target r :scs (unsigned-reg zero))) (:info y) @@ -107,6 +125,15 @@ (:result-types unsigned-num) (:note "inline (unsigned-byte 32) arithmetic")) +(define-vop (fast-unsigned-binop32-c fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg zero))) + (:info y) + (:arg-types unsigned-num + (:constant (and (unsigned-byte 32) (not (integer 0 0))))) + (:results (r :scs (unsigned-reg))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic")) + (define-vop (fast-unsigned-logop-c fast-safe-arith-op) (:args (x :target r :scs (unsigned-reg zero))) (:info y) @@ -116,29 +143,40 @@ (:result-types unsigned-num) (:note "inline (unsigned-byte 32) logical op")) -(define-vop (fast-signed-binop-c fast-safe-arith-op) - (:args (x :target r :scs (signed-reg zero))) +(define-vop (fast-unsigned-logop32-c fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg zero))) (:info y) - (:arg-types signed-num + (:arg-types unsigned-num + (:constant (and (unsigned-byte 32) (not (integer 0 0))))) + (:results (r :scs (unsigned-reg))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) logical op")) + + + +(define-vop (fast-unsigned-binop-c fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg zero))) + (:info y) + (:arg-types unsigned-num (:constant (and (signed-byte 16) (not (integer 0 0))))) - (:results (r :scs (signed-reg))) - (:result-types signed-num) - (:note "inline (signed-byte 32) arithmetic")) + (:results (r :scs (unsigned-reg))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic")) -(define-vop (fast-signed-logop-c fast-safe-arith-op) - (:args (x :target r :scs (signed-reg zero))) +(define-vop (fast-unsigned-logop-c fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg zero))) (:info y) - (:arg-types signed-num + (:arg-types unsigned-num (:constant (and (unsigned-byte 16) (not (integer 0 0))))) - (:results (r :scs (signed-reg))) - (:result-types signed-num) - (:note "inline (signed-byte 32) arithmetic")) + (:results (r :scs (unsigned-reg))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) logical op")) (eval-when (:compile-toplevel :load-toplevel :execute) -(defmacro define-var-binop (translate untagged-penalty op - &optional arg-swap restore-fixnum-mask) +(defmacro !define-var-binop (translate untagged-penalty op + &optional arg-swap restore-fixnum-mask) `(progn (define-vop (,(symbolicate "FAST-" translate "/FIXNUM=>FIXNUM") fast-fixnum-binop) @@ -168,65 +206,207 @@ `(inst ,op r y x) `(inst ,op r x y)))))) - -(defmacro define-const-binop (translate untagged-penalty op) +;;; FIXME: the code has really only been checked for adds; we could do +;;; subtracts, too, but my brain is not up to the task of figuring out +;;; signs and borrows. +(defmacro !define-const-binop (translate untagged-penalty op &optional (shifted-op nil)) `(progn - (define-vop (,(symbolicate 'fast- translate '-c/fixnum=>fixnum) - fast-fixnum-binop-c) + ,(if shifted-op + 'fast-fixnum-binop30-c + 'fast-fixnum-binop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc any-reg :target r) temp))) (:generator 1 - (inst ,op r x (fixnumize y)))) + ,(if shifted-op + `(let* ((y (fixnumize y)) + (high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + ;; Compare %LR in insts.lisp. + (cond + ((and (logbitp 15 low-half) (= high-half #xffff)) + ;; Let sign-extension do the work for us, but make sure + ;; to turn LOW-HALF into a signed integer. + (inst ,op r x (dpb low-half (byte 16 0) -1))) + ((and (not (logbitp 15 low-half)) (zerop high-half)) + (inst ,op r x low-half)) + ((zerop low-half) + (inst ,shifted-op r x (if (logbitp 15 high-half) + (dpb high-half (byte 16 0) -1) + high-half))) + (t + ;; Check to see whether compensating for the sign bit + ;; of LOW-HALF is necessary. + (let ((high-half (let ((top (if (logbitp 15 low-half) + (ldb (byte 16 0) + (1+ high-half)) + high-half))) + (if (logbitp 15 top) + (dpb top (byte 16 0) -1) + top)))) + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half))))) + `(inst ,op r x (fixnumize y))))) (define-vop (,(symbolicate 'fast- translate '-c/signed=>signed) - fast-signed-binop-c) + ,(if shifted-op + 'fast-signed-binop32-c + 'fast-signed-binop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc non-descriptor-reg :target r) temp))) (:generator ,untagged-penalty - (inst ,op r x y))) + ,(if shifted-op + `(let ((high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + ;; Compare %LR in insts.lisp. + (cond + ((and (logbitp 15 low-half) (= high-half #xffff)) + ;; Let sign-extension do the work for us, but make sure + ;; to turn LOW-HALF into a signed integer. + (inst ,op r x (dpb low-half (byte 16 0) -1))) + ((and (not (logbitp 15 low-half)) (zerop high-half)) + (inst ,op r x low-half)) + ((zerop low-half) + (inst ,shifted-op r x (if (logbitp 15 high-half) + (dpb high-half (byte 16 0) -1) + high-half))) + (t + ;; Check to see whether compensating for the sign bit + ;; of LOW-HALF is necessary. + (let ((high-half (let ((top (if (logbitp 15 low-half) + (ldb (byte 16 0) + (1+ high-half)) + high-half))) + (if (logbitp 15 top) + (dpb top (byte 16 0) -1) + top)))) + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half))))) + `(inst ,op r x y)))) (define-vop (,(symbolicate 'fast- translate '-c/unsigned=>unsigned) - fast-unsigned-binop-c) + ,(if shifted-op + 'fast-unsigned-binop32-c + 'fast-unsigned-binop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc non-descriptor-reg :target r) temp))) (:generator ,untagged-penalty - (inst ,op r x y))))) - -(defmacro define-const-logop (translate untagged-penalty op) + ,(if shifted-op + `(let ((high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + ;; Compare %LR in insts.lisp. + (cond + ((and (logbitp 15 low-half) (= high-half #xffff)) + ;; Let sign-extension do the work for us, but make sure + ;; to turn LOW-HALF into a signed integer. + (inst ,op r x (dpb low-half (byte 16 0) -1))) + ((and (not (logbitp 15 low-half)) (zerop high-half)) + (inst ,op r x low-half)) + ((zerop low-half) + (inst ,shifted-op r x (if (logbitp 15 high-half) + (dpb high-half (byte 16 0) -1) + high-half))) + (t + ;; Check to see whether compensating for the sign bit + ;; of LOW-HALF is necessary. + (let ((high-half (let ((top (if (logbitp 15 low-half) + (ldb (byte 16 0) + (1+ high-half)) + high-half))) + (if (logbitp 15 top) + (dpb top (byte 16 0) -1) + top)))) + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half))))) + `(inst ,op r x y)))))) + +;;; For logical operations, we don't have to worry about signed bit +;;; propagation from the lower half of a 32-bit operand. +(defmacro !define-const-logop (translate untagged-penalty op &optional (shifted-op nil)) `(progn - (define-vop (,(symbolicate 'fast- translate '-c/fixnum=>fixnum) - fast-fixnum-logop-c) + ,(if shifted-op + 'fast-fixnum-logop30-c + 'fast-fixnum-logop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc any-reg :target r) temp))) (:generator 1 - (inst ,op r x (fixnumize y)))) + ,(if shifted-op + `(let* ((y (fixnumize y)) + (high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + (cond + ((zerop high-half) (inst ,op r x low-half)) + ((zerop low-half) (inst ,shifted-op r x high-half)) + (t + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half)))) + `(inst ,op r x (fixnumize y))))) (define-vop (,(symbolicate 'fast- translate '-c/signed=>signed) - fast-signed-logop-c) + ,(if shifted-op + 'fast-signed-logop32-c + 'fast-signed-logop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc non-descriptor-reg :target r) temp))) (:generator ,untagged-penalty - (inst ,op r x y))) + ,(if shifted-op + `(let ((high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + (cond + ((zerop high-half) (inst ,op r x low-half)) + ((zerop low-half) (inst ,shifted-op r x high-half)) + (t + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half)))) + `(inst ,op r x y)))) (define-vop (,(symbolicate 'fast- translate '-c/unsigned=>unsigned) - fast-unsigned-logop-c) + ,(if shifted-op + 'fast-unsigned-logop32-c + 'fast-unsigned-logop-c)) (:translate ,translate) + ,@(when shifted-op + `((:temporary (:sc non-descriptor-reg :target r) temp))) (:generator ,untagged-penalty - (inst ,op r x y))))) + ,(if shifted-op + `(let ((high-half (ldb (byte 16 16) y)) + (low-half (ldb (byte 16 0) y))) + (cond + ((zerop high-half) (inst ,op r x low-half)) + ((zerop low-half) (inst ,shifted-op r x high-half)) + (t + (inst ,shifted-op temp x high-half) + (inst ,op r temp low-half)))) + `(inst ,op r x y)))))) ); eval-when -(define-var-binop + 4 add) -(define-var-binop - 4 sub) -(define-var-binop logand 2 and) -(define-var-binop logandc1 2 andc t) -(define-var-binop logandc2 2 andc) -(define-var-binop logior 2 or) -(define-var-binop logorc1 2 orc t t) -(define-var-binop logorc2 2 orc nil t) -(define-var-binop logxor 2 xor) -(define-var-binop logeqv 2 eqv nil t) -(define-var-binop lognand 2 nand nil t) -(define-var-binop lognor 2 nor nil t) - -(define-const-binop + 4 addi) -(define-const-binop - 4 subi) -(define-const-logop logand 2 andi.) -(define-const-logop logior 2 ori) -(define-const-logop logxor 2 xori) +(!define-var-binop + 4 add) +(!define-var-binop - 4 sub) +(!define-var-binop logand 2 and) +(!define-var-binop logandc1 2 andc t) +(!define-var-binop logandc2 2 andc) +(!define-var-binop logior 2 or) +(!define-var-binop logorc1 2 orc t t) +(!define-var-binop logorc2 2 orc nil t) +(!define-var-binop logxor 2 xor) +(!define-var-binop logeqv 2 eqv nil t) +(!define-var-binop lognand 2 nand nil t) +(!define-var-binop lognor 2 nor nil t) + +(!define-const-binop + 4 addi addis) +(!define-const-binop - 4 subi) +;;; Implementing a 32-bit immediate version of LOGAND wouldn't be any +;;; better than loading the 32-bit constant via LR and then performing +;;; an /AND/. So don't bother. (It would be better in some cases, such +;;; as when one half of the word is zeros--we save a register--but we +;;; would have specified one temporary register in the VOP, so we lose +;;; any possible advantage.) +(!define-const-logop logand 2 andi.) +(!define-const-logop logior 2 ori oris) +(!define-const-logop logxor 2 xori xoris) ;;; Special case fixnum + and - that trap on overflow. Useful when we diff --git a/version.lisp-expr b/version.lisp-expr index f539110..0f1b631 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.2.6" +"0.9.2.7"