From: Nathan Froyd Date: Tue, 26 Jul 2005 18:51:43 +0000 (+0000) Subject: 0.9.3.2: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=d319b944d934f3efbb01a2a345c46bafd40857d0;p=sbcl.git 0.9.3.2: Micro-optimizations: * MAKE-FIXNUM on PPC now takes one instruction rather than two; * signed-num INTEGER-LENGTH VOP on PPC does not require a temporary register; * Added unsigned-num INTEGER-LENGTH VOP on PPC; * Musings on processor-specific micro-optimizations added to OPTIMIZATIONS. --- diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index a702c7d..c79f922 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -231,4 +231,37 @@ it could be; if we placed raw slots before the header word, we would not need to do arithmetic at runtime to access them. (But beware: this would complicate handling of the interior pointer). -b. (Also note that raw slots are currently disabled on HPPA) \ No newline at end of file +b. (Also note that raw slots are currently disabled on HPPA) +-------------------------------------------------------------------------------- +#29 +Python is overly zealous when converting high-level CL functions, such +as MIN/MAX, LOGBITP, and LOGTEST, to low-level CL functions. Reducing +Python's aggressiveness would make it easier to effect changes such as + +x86-64: +* direct MIN/MAX on {SINGLE,DOUBLE}-FLOATs ({MIN,MAX}S{S,D}) + +x86{,-64}: +* direct LOGBITP on word-sized integers and fixnums (BT + JC) + +x86{,-64}/PPC: +* branch-free MIN/MAX on word-sized integers and fixnums +* efficient LOGTESTs on word-sized integers and fixnums (TEST / AND.) + +PPC: +* efficient LDB on word-sized integers and fixnums (RLWINM) + +etc., etc. + +The "easier" part claimed above would come about because the functions +would be available for :TRANSLATE through a VOP or similar, whereas with +the current architecture, one would have to pattern-match IR1. While +IR1 pattern-matching would be useful in other contexts, it seems better +here to attempt the direct :TRANSLATE route. + +I (NJF) don't know how to implement such architecture-specific +optimizations whilst keeping the high->low transformations for other +architectures. Certainly adding #!+/- magic in compiler/*.lisp could be +done, but such a solution is somewhat inelegant. Moving the relevant +DEFTRANSFORMs to the architecture-specific compiler/* areas is also +possible, but that would duplicate quite a bit of code. diff --git a/src/compiler/ppc/arith.lisp b/src/compiler/ppc/arith.lisp index ee7a851..c633a13 100644 --- a/src/compiler/ppc/arith.lisp +++ b/src/compiler/ppc/arith.lisp @@ -614,19 +614,29 @@ (:policy :fast-safe) (:args (arg :scs (signed-reg))) (:arg-types signed-num) - (:results (res :scs (any-reg))) - (:result-types positive-fixnum) - (:temporary (:scs (non-descriptor-reg) :to (:argument 0)) shift) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) (:generator 6 ; (integer-length arg) = (- 32 (cntlz (if (>= arg 0) arg (lognot arg)))) (let ((nonneg (gen-label))) - (inst cntlzw. shift arg) + (inst cntlzw. res arg) (inst bne nonneg) - (inst not shift arg) - (inst cntlzw shift shift) + (inst not res arg) + (inst cntlzw res res) (emit-label nonneg) - (inst slwi shift shift 2) - (inst subfic res shift (fixnumize 32))))) + (inst subfic res res 32)))) + +(define-vop (unsigned-byte-32-len) + (:translate integer-length) + (:note "inline (unsigned-byte 32) integer-length") + (:policy :fast-safe) + (:args (arg :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 4 + (inst cntlzw res arg) + (inst subfic res res 32))) (define-vop (unsigned-byte-32-count) (:translate logcount) diff --git a/src/compiler/ppc/insts.lisp b/src/compiler/ppc/insts.lisp index 9dceec4..b14927a 100644 --- a/src/compiler/ppc/insts.lisp +++ b/src/compiler/ppc/insts.lisp @@ -1828,6 +1828,12 @@ (define-instruction-macro extlwi. (ra rs n b) `(inst rlwinm. ,ra ,rs ,b 0 (1- ,n))) + (define-instruction-macro extrwi (ra rs n b) + `(inst rlwinm ,ra ,rs (mod (+ ,b ,n) 32) (- 32 ,n) 31)) + + (define-instruction-macro extrwi. (ra rs n b) + `(inst rlwinm. ,ra ,rs (mod (+ ,b ,n) 32) (- 32 ,n) 31)) + (define-instruction-macro srwi (ra rs n) `(inst rlwinm ,ra ,rs (- 32 ,n) ,n 31)) diff --git a/src/compiler/ppc/system.lisp b/src/compiler/ppc/system.lisp index 6a437e1..d640ab9 100644 --- a/src/compiler/ppc/system.lisp +++ b/src/compiler/ppc/system.lisp @@ -128,8 +128,7 @@ ;; ;; Some code (the hash table code) depends on this returning a ;; positive number so make sure it does. - (inst slwi res ptr 3) - (inst srwi res res 1))) + (inst rlwinm res ptr n-fixnum-tag-bits 1 n-positive-fixnum-bits))) (define-vop (make-other-immediate-type) (:args (val :scs (any-reg descriptor-reg)) diff --git a/version.lisp-expr b/version.lisp-expr index 8b044a6..01c4735 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.3.1" +"0.9.3.2"