X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Farith.lisp;h=1380cdbe34e1afac26547c685b94c276d574e575;hb=51e63f301624e39febdd85b5feba19b7c980f307;hp=3910de909539d4a2b7cc05e8c45eee7594ae4c92;hpb=8b6626faeca6b41a0c7aacb5293c54d115cec154;p=sbcl.git diff --git a/src/compiler/x86/arith.lisp b/src/compiler/x86/arith.lisp index 3910de9..1380cdb 100644 --- a/src/compiler/x86/arith.lisp +++ b/src/compiler/x86/arith.lisp @@ -614,7 +614,7 @@ ;; at the low five bits of the result. (inst sar result (min 31 (- amount))) ;; Fixnum correction. - (inst and result #xfffffffc))))))) + (inst and result (lognot fixnum-tag-mask)))))))) (define-vop (fast-ash-left/fixnum=>fixnum) (:translate ash) @@ -953,43 +953,53 @@ (:translate logcount) (:note "inline (unsigned-byte 32) logcount") (:policy :fast-safe) - (:args (arg :scs (unsigned-reg))) + (:args (arg :scs (unsigned-reg) :target result)) (:arg-types unsigned-num) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) - (:temporary (:sc unsigned-reg :from (:argument 0)) temp) - (:generator 30 + (:temporary (:sc unsigned-reg) temp) + (:generator 14 + ;; See the comments below for how the algorithm works. The tricks + ;; used can be found for example in AMD's software optimization + ;; guide or at "http://www.hackersdelight.org/HDcode/pop.cc" in the + ;; function "pop1". + ;; Calculate 2-bit sums. Note that the value of a two-digit binary + ;; number is the sum of the right digit and twice the left digit. + ;; Thus we can calculate the sum of the two digits by shifting the + ;; left digit to the right position and doing a two-bit subtraction. + ;; This subtraction will never create a borrow and thus can be made + ;; on all 16 2-digit numbers at once. (move result arg) - - (inst mov temp result) - (inst shr temp 1) + (move temp arg) + (inst shr result 1) (inst and result #x55555555) - (inst and temp #x55555555) - (inst add result temp) - - (inst mov temp result) + (inst sub temp result) + ;; Calculate 4-bit sums by straightforward shift, mask and add. + ;; Note that we shift the source operand of the MOV and not its + ;; destination so that the SHR and the MOV can execute in the same + ;; clock cycle. + (inst mov result temp) (inst shr temp 2) (inst and result #x33333333) (inst and temp #x33333333) (inst add result temp) - + ;; Calculate 8-bit sums. Since each sum is at most 8, which fits + ;; into 4 bits, we can apply the mask after the addition, saving one + ;; instruction. (inst mov temp result) - (inst shr temp 4) - (inst and result #x0f0f0f0f) - (inst and temp #x0f0f0f0f) + (inst shr result 4) (inst add result temp) - + (inst and result #x0f0f0f0f) + ;; Calculate the two 16-bit sums and the 32-bit sum. No masking is + ;; necessary inbetween since the final sum is at most 32 which fits + ;; into 6 bits. (inst mov temp result) - (inst shr temp 8) - (inst and result #x00ff00ff) - (inst and temp #x00ff00ff) + (inst shr result 8) (inst add result temp) - (inst mov temp result) - (inst shr temp 16) - (inst and result #x0000ffff) - (inst and temp #x0000ffff) - (inst add result temp))) + (inst shr result 16) + (inst add result temp) + (inst and result #xff))) ;;;; binary conditional VOPs @@ -1050,15 +1060,17 @@ ,(symbolicate "FAST-CONDITIONAL" suffix)) (:translate logtest) (:generator ,cost - (inst test x ,(if (eq suffix '-c/fixnum) - '(fixnumize y) - 'y)) + (emit-optimized-test-inst x + ,(if (eq suffix '-c/fixnum) + '(fixnumize y) + 'y)) (inst jmp (if not-p :e :ne) target))))))) (define-logtest-vops)) (defknown %logbitp (integer unsigned-byte) boolean - (movable foldable flushable)) + (movable foldable flushable always-translatable)) +;;; only for constant folding within the compiler (defun %logbitp (integer index) (logbitp index integer)) @@ -1071,6 +1083,8 @@ (inst jmp (if not-p :nc :c) target))) (define-vop (fast-logbitp/signed fast-conditional/signed) + (:args (x :scs (signed-reg signed-stack)) + (y :scs (signed-reg))) (:translate %logbitp) (:generator 6 (inst bt x y) @@ -1084,6 +1098,8 @@ (inst jmp (if not-p :nc :c) target))) (define-vop (fast-logbitp/unsigned fast-conditional/unsigned) + (:args (x :scs (unsigned-reg unsigned-stack)) + (y :scs (unsigned-reg))) (:translate %logbitp) (:generator 6 (inst bt x y) @@ -1460,7 +1476,9 @@ (define-full-reffer bignum-ref * bignum-digits-offset other-pointer-lowtag (unsigned-reg) unsigned-num sb!bignum:%bignum-ref) - +(define-full-reffer+offset bignum-ref-with-offset * + bignum-digits-offset other-pointer-lowtag + (unsigned-reg) unsigned-num sb!bignum:%bignum-ref-with-offset) (define-full-setter bignum-set * bignum-digits-offset other-pointer-lowtag (unsigned-reg) unsigned-num sb!bignum:%bignum-set) @@ -1650,11 +1668,25 @@ :load-if (not (and (sc-is result unsigned-stack) (location= digit result))))) (:result-types unsigned-num) - (:generator 1 + (:generator 2 (move result digit) (move ecx count) (inst sar result :cl))) +(define-vop (digit-ashr/c) + (:translate sb!bignum:%ashr) + (:policy :fast-safe) + (:args (digit :scs (unsigned-reg unsigned-stack) :target result)) + (:arg-types unsigned-num (:constant (integer 0 31))) + (:info count) + (:results (result :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is result unsigned-stack) + (location= digit result))))) + (:result-types unsigned-num) + (:generator 1 + (move result digit) + (inst sar result count))) + (define-vop (digit-lshr digit-ashr) (:translate sb!bignum:%digit-logical-shift-right) (:generator 1