X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86-64%2Farith.lisp;h=d0436d5d99b948b32fc01269d19f0ee562346d6e;hb=5cf3c4259d529e180d75d4d140f344e600d2b06b;hp=304590315fde2ecefede6d5e12f4404910b6dbef;hpb=0d871fd7a98fc4af92a8b942a1154761466ad8c9;p=sbcl.git diff --git a/src/compiler/x86-64/arith.lisp b/src/compiler/x86-64/arith.lisp index 3045903..d0436d5 100644 --- a/src/compiler/x86-64/arith.lisp +++ b/src/compiler/x86-64/arith.lisp @@ -1,4 +1,4 @@ -;;;; the VM definition of arithmetic VOPs for the x86 +;;;; the VM definition of arithmetic VOPs for the x86-64 ;;;; This software is part of the SBCL system. See the README file for ;;;; more information. @@ -46,13 +46,13 @@ (define-vop (fast-lognot/fixnum fixnum-unop) (:translate lognot) - (:generator 2 + (:generator 1 (move res x) (inst xor res (fixnumize -1)))) (define-vop (fast-lognot/signed signed-unop) (:translate lognot) - (:generator 1 + (:generator 2 (move res x) (inst not res))) @@ -400,12 +400,12 @@ (:args (x :scs (unsigned-reg) :target eax) (y :scs (unsigned-reg unsigned-stack))) (:arg-types unsigned-num unsigned-num) - (:temporary (:sc unsigned-reg :offset eax-offset :target result + (:temporary (:sc unsigned-reg :offset eax-offset :target r :from (:argument 0) :to :result) eax) (:temporary (:sc unsigned-reg :offset edx-offset :from :eval :to :result) edx) (:ignore edx) - (:results (result :scs (unsigned-reg))) + (:results (r :scs (unsigned-reg))) (:result-types unsigned-num) (:note "inline (unsigned-byte 64) arithmetic") (:vop-var vop) @@ -413,7 +413,7 @@ (:generator 6 (move eax x) (inst mul eax y) - (move result eax))) + (move r eax))) (define-vop (fast-truncate/fixnum=>fixnum fast-safe-arith-op) @@ -432,7 +432,7 @@ (:vop-var vop) (:save-p :compute-only) (:generator 31 - (let ((zero (generate-error-code vop division-by-zero-error x y))) + (let ((zero (generate-error-code vop 'division-by-zero-error x y))) (if (sc-is y any-reg) (inst test y y) ; smaller instruction (inst cmp y 0)) @@ -487,7 +487,7 @@ (:vop-var vop) (:save-p :compute-only) (:generator 33 - (let ((zero (generate-error-code vop division-by-zero-error x y))) + (let ((zero (generate-error-code vop 'division-by-zero-error x y))) (if (sc-is y unsigned-reg) (inst test y y) ; smaller instruction (inst cmp y 0)) @@ -538,7 +538,7 @@ (:vop-var vop) (:save-p :compute-only) (:generator 33 - (let ((zero (generate-error-code vop division-by-zero-error x y))) + (let ((zero (generate-error-code vop 'division-by-zero-error x y))) (if (sc-is y signed-reg) (inst test y y) ; smaller instruction (inst cmp y 0)) @@ -593,26 +593,27 @@ (:note "inline ASH") (:generator 2 (cond ((and (= amount 1) (not (location= number result))) - (inst lea result (make-ea :qword :index number :scale 2))) + (inst lea result (make-ea :qword :base number :index number))) ((and (= amount 2) (not (location= number result))) (inst lea result (make-ea :qword :index number :scale 4))) ((and (= amount 3) (not (location= number result))) (inst lea result (make-ea :qword :index number :scale 8))) (t (move result number) - (cond ((plusp amount) - ;; We don't have to worry about overflow because of the - ;; result type restriction. - (inst shl result amount)) - (t - ;; Since the shift instructions take the shift amount - ;; modulo 64 we must special case amounts of 64 and more. - ;; Because fixnums have only 61 bits, the result is 0 or - ;; -1 for all amounts of 60 or more, so use this as the - ;; limit instead. - (inst sar result (min (- n-word-bits n-fixnum-tag-bits 1) - (- amount))) - (inst and result (lognot fixnum-tag-mask)))))))) + (cond ((< -64 amount 64) + ;; this code is used both in ASH and ASH-SMOD61, so + ;; be careful + (if (plusp amount) + (inst shl result amount) + (progn + (inst sar result (- amount)) + (inst and result (lognot fixnum-tag-mask))))) + ((plusp amount) + (if (sc-is result any-reg) + (inst xor result result) + (inst mov result 0))) + (t (inst sar result 63) + (inst and result (lognot fixnum-tag-mask)))))))) (define-vop (fast-ash-left/fixnum=>fixnum) (:translate ash) @@ -653,7 +654,7 @@ (:note "inline ASH") (:generator 3 (cond ((and (= amount 1) (not (location= number result))) - (inst lea result (make-ea :qword :index number :scale 2))) + (inst lea result (make-ea :qword :base number :index number))) ((and (= amount 2) (not (location= number result))) (inst lea result (make-ea :qword :index number :scale 4))) ((and (= amount 3) (not (location= number result))) @@ -680,7 +681,7 @@ (:note "inline ASH") (:generator 3 (cond ((and (= amount 1) (not (location= number result))) - (inst lea result (make-ea :qword :index number :scale 2))) + (inst lea result (make-ea :qword :base number :index number))) ((and (= amount 2) (not (location= number result))) (inst lea result (make-ea :qword :index number :scale 4))) ((and (= amount 3) (not (location= number result))) @@ -694,7 +695,7 @@ (inst shl result amount) (inst shr result (- amount)))) (t (if (sc-is result unsigned-reg) - (inst xor result result) + (zeroize result) (inst mov result 0)))))))) (define-vop (fast-ash-left/signed=>signed) @@ -786,7 +787,7 @@ (inst neg ecx) (inst cmp ecx 63) (inst jmp :be OKAY) - (inst xor result result) + (zeroize result) (inst jmp DONE) OKAY (inst shr result :cl) @@ -896,7 +897,7 @@ (inst or ecx ecx) (inst jmp :ns POSITIVE) (inst neg ecx) - (inst xor zero zero) + (zeroize zero) (inst shr result :cl) (inst cmp ecx 63) (inst cmov :nbe result zero) @@ -918,7 +919,9 @@ (:result-types unsigned-num) (:generator 28 (move res arg) - (inst cmp res 0) + (if (sc-is res unsigned-reg) + (inst test res res) + (inst cmp res 0)) (inst jmp :ge POS) (inst not res) POS @@ -927,7 +930,7 @@ (inst inc res) (inst jmp DONE) ZERO - (inst xor res res) + (zeroize res) DONE)) (define-vop (unsigned-byte-64-len) @@ -944,89 +947,63 @@ (inst inc res) (inst jmp DONE) ZERO - (inst xor res res) + (zeroize res) DONE)) - (define-vop (unsigned-byte-64-count) (:translate logcount) (:note "inline (unsigned-byte 64) logcount") (:policy :fast-safe) - (:args (arg :scs (unsigned-reg))) + (:args (arg :scs (unsigned-reg) :target result)) (:arg-types unsigned-num) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) - (:temporary (:sc unsigned-reg :from (:argument 0)) temp) - (:temporary (:sc unsigned-reg :from (:argument 0)) t1) - (:generator 60 + (:temporary (:sc unsigned-reg) temp) + (:temporary (:sc unsigned-reg) mask) + (:generator 14 + ;; See the comments below for how the algorithm works. The tricks + ;; used can be found for example in AMD's software optimization + ;; guide or at "http://www.hackersdelight.org/HDcode/pop.cc" in the + ;; function "pop1", for 32-bit words. The extension to 64 bits is + ;; straightforward. + ;; Calculate 2-bit sums. Note that the value of a two-digit binary + ;; number is the sum of the right digit and twice the left digit. + ;; Thus we can calculate the sum of the two digits by shifting the + ;; left digit to the right position and doing a two-bit subtraction. + ;; This subtraction will never create a borrow and thus can be made + ;; on all 32 2-digit numbers at once. (move result arg) - (move t1 arg) - - (inst mov temp result) - (inst shr temp 1) - (inst and result #x55555555) ; note these masks will restrict the - (inst and temp #x55555555) ; count to the lower half of arg - (inst add result temp) - - (inst mov temp result) + (move temp arg) + (inst shr result 1) + (inst mov mask #x5555555555555555) + (inst and result mask) + (inst sub temp result) + ;; Calculate 4-bit sums by straightforward shift, mask and add. + ;; Note that we shift the source operand of the MOV and not its + ;; destination so that the SHR and the MOV can execute in the same + ;; clock cycle. + (inst mov result temp) (inst shr temp 2) - (inst and result #x33333333) - (inst and temp #x33333333) + (inst mov mask #x3333333333333333) + (inst and result mask) + (inst and temp mask) (inst add result temp) - + ;; Calculate 8-bit sums. Since each sum is at most 8, which fits + ;; into 4 bits, we can apply the mask after the addition, saving one + ;; instruction. (inst mov temp result) - (inst shr temp 4) - (inst and result #x0f0f0f0f) - (inst and temp #x0f0f0f0f) + (inst shr result 4) (inst add result temp) - - (inst mov temp result) - (inst shr temp 8) - (inst and result #x00ff00ff) - (inst and temp #x00ff00ff) - (inst add result temp) - - (inst mov temp result) - (inst shr temp 16) - (inst and result #x0000ffff) - (inst and temp #x0000ffff) - (inst add result temp) - - ;;; now do the upper half - (inst shr t1 32) - - (inst mov temp t1) - (inst shr temp 1) - (inst and t1 #x55555555) - (inst and temp #x55555555) - (inst add t1 temp) - - (inst mov temp t1) - (inst shr temp 2) - (inst and t1 #x33333333) - (inst and temp #x33333333) - (inst add t1 temp) - - (inst mov temp t1) - (inst shr temp 4) - (inst and t1 #x0f0f0f0f) - (inst and temp #x0f0f0f0f) - (inst add t1 temp) - - (inst mov temp t1) - (inst shr temp 8) - (inst and t1 #x00ff00ff) - (inst and temp #x00ff00ff) - (inst add t1 temp) - - (inst mov temp t1) - (inst shr temp 16) - (inst and t1 #x0000ffff) - (inst and temp #x0000ffff) - (inst add t1 temp) - (inst add result t1))) - - + (inst mov mask #x0f0f0f0f0f0f0f0f) + (inst and result mask) + ;; Add all 8 bytes at once by multiplying with #256r11111111. + ;; We need to calculate only the lower 8 bytes of the product. + ;; Of these the most significant byte contains the final result. + ;; Note that there can be no overflow from one byte to the next + ;; as the sum is at most 64 which needs only 7 bits. + (inst mov mask #x0101010101010101) + (inst imul result mask) + (inst shr result 56))) ;;;; binary conditional VOPs @@ -1233,27 +1210,68 @@ ;;;; Modular functions +(defmacro define-mod-binop ((name prototype) function) + `(define-vop (,name ,prototype) + (:args (x :target r :scs (unsigned-reg signed-reg) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is y unsigned-reg) + (sc-is y signed-reg)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) + (location= x r)))) + (y :scs (unsigned-reg signed-reg unsigned-stack signed-stack))) + (:arg-types untagged-num untagged-num) + (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is y unsigned-reg) + (sc-is y unsigned-reg)) + (or (sc-is r unsigned-stack) + (sc-is r unsigned-stack)) + (location= x r))))) + (:result-types unsigned-num) + (:translate ,function))) +(defmacro define-mod-binop-c ((name prototype) function) + `(define-vop (,name ,prototype) + (:args (x :target r :scs (unsigned-reg signed-reg) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) + (location= x r))))) + (:info y) + (:arg-types untagged-num (:constant (or (unsigned-byte 31) (signed-byte 32)))) + (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r unsigned-stack)) + (location= x r))))) + (:result-types unsigned-num) + (:translate ,function))) + (macrolet ((def (name -c-p) (let ((fun64 (intern (format nil "~S-MOD64" name))) (vopu (intern (format nil "FAST-~S/UNSIGNED=>UNSIGNED" name))) (vopcu (intern (format nil "FAST-~S-C/UNSIGNED=>UNSIGNED" name))) (vopf (intern (format nil "FAST-~S/FIXNUM=>FIXNUM" name))) (vopcf (intern (format nil "FAST-~S-C/FIXNUM=>FIXNUM" name))) - (vop64u (intern (format nil "FAST-~S-MOD64/UNSIGNED=>UNSIGNED" name))) + (vop64u (intern (format nil "FAST-~S-MOD64/WORD=>UNSIGNED" name))) (vop64f (intern (format nil "FAST-~S-MOD64/FIXNUM=>FIXNUM" name))) - (vop64cu (intern (format nil "FAST-~S-MOD64-C/UNSIGNED=>UNSIGNED" name))) + (vop64cu (intern (format nil "FAST-~S-MOD64-C/WORD=>UNSIGNED" name))) (vop64cf (intern (format nil "FAST-~S-MOD64-C/FIXNUM=>FIXNUM" name))) (sfun61 (intern (format nil "~S-SMOD61" name))) (svop61f (intern (format nil "FAST-~S-SMOD61/FIXNUM=>FIXNUM" name))) (svop61cf (intern (format nil "FAST-~S-SMOD61-C/FIXNUM=>FIXNUM" name)))) `(progn - (define-modular-fun ,fun64 (x y) ,name :unsigned 64) - (define-modular-fun ,sfun61 (x y) ,name :signed 61) - (define-vop (,vop64u ,vopu) (:translate ,fun64)) + (define-modular-fun ,fun64 (x y) ,name :untagged nil 64) + (define-modular-fun ,sfun61 (x y) ,name :tagged t 61) + (define-mod-binop (,vop64u ,vopu) ,fun64) (define-vop (,vop64f ,vopf) (:translate ,fun64)) (define-vop (,svop61f ,vopf) (:translate ,sfun61)) ,@(when -c-p - `((define-vop (,vop64cu ,vopcu) (:translate ,fun64)) + `((define-mod-binop-c (,vop64cu ,vopcu) ,fun64) (define-vop (,svop61cf ,vopcf) (:translate ,sfun61)))))))) (def + t) (def - t) @@ -1291,19 +1309,19 @@ (signed-byte 61) (foldable flushable movable)) -(define-modular-fun-optimizer %lea ((base index scale disp) :unsigned :width width) +(define-modular-fun-optimizer %lea ((base index scale disp) :untagged nil :width width) (when (and (<= width 64) (constant-lvar-p scale) (constant-lvar-p disp)) - (cut-to-width base :unsigned width) - (cut-to-width index :unsigned width) + (cut-to-width base :untagged width nil) + (cut-to-width index :untagged width nil) 'sb!vm::%lea-mod64)) -(define-modular-fun-optimizer %lea ((base index scale disp) :signed :width width) +(define-modular-fun-optimizer %lea ((base index scale disp) :tagged t :width width) (when (and (<= width 61) (constant-lvar-p scale) (constant-lvar-p disp)) - (cut-to-width base :signed width) - (cut-to-width index :signed width) + (cut-to-width base :tagged width t) + (cut-to-width index :tagged width t) 'sb!vm::%lea-smod61)) #+sb-xc-host @@ -1337,7 +1355,7 @@ (:translate %lea-smod61)) ;;; logical operations -(define-modular-fun lognot-mod64 (x) lognot :unsigned 64) +(define-modular-fun lognot-mod64 (x) lognot :untagged nil 64) (define-vop (lognot-mod64/unsigned=>unsigned) (:translate lognot-mod64) (:args (x :scs (unsigned-reg unsigned-stack) :target r @@ -1355,20 +1373,6 @@ (move r x) (inst not r))) -(define-modular-fun logxor-mod64 (x y) logxor :unsigned 64) -(define-vop (fast-logxor-mod64/unsigned=>unsigned - fast-logxor/unsigned=>unsigned) - (:translate logxor-mod64)) -(define-vop (fast-logxor-mod64-c/unsigned=>unsigned - fast-logxor-c/unsigned=>unsigned) - (:translate logxor-mod64)) -(define-vop (fast-logxor-mod64/fixnum=>fixnum - fast-logxor/fixnum=>fixnum) - (:translate logxor-mod64)) -(define-vop (fast-logxor-mod64-c/fixnum=>fixnum - fast-logxor-c/fixnum=>fixnum) - (:translate logxor-mod64)) - (define-source-transform logeqv (&rest args) (if (oddp (length args)) `(logxor ,@args) @@ -1398,7 +1402,9 @@ (define-full-reffer bignum-ref * bignum-digits-offset other-pointer-lowtag (unsigned-reg) unsigned-num sb!bignum:%bignum-ref) - +(define-full-reffer+offset bignum--ref-with-offset * bignum-digits-offset + other-pointer-lowtag (unsigned-reg) unsigned-num + sb!bignum:%bignum-ref-with-offset) (define-full-setter bignum-set * bignum-digits-offset other-pointer-lowtag (unsigned-reg) unsigned-num sb!bignum:%bignum-set) @@ -1415,8 +1421,8 @@ ;;; For add and sub with carry the sc of carry argument is any-reg so -;;; the it may be passed as a fixnum or word and thus may be 0, 1, or -;;; 4. This is easy to deal with and may save a fixnum-word +;;; that it may be passed as a fixnum or word and thus may be 0, 1, or +;;; 8. This is easy to deal with and may save a fixnum-word ;;; conversion. (define-vop (add-w/carry) (:translate sb!bignum:%add-with-carry) @@ -1437,8 +1443,8 @@ (inst mov carry 0) (inst adc carry carry))) -;;; Note: the borrow is the oppostite of the x86 convention - 1 for no -;;; borrow and 0 for a borrow. +;;; Note: the borrow is 1 for no borrow and 0 for a borrow, the opposite +;;; of the x86-64 convention. (define-vop (sub-w/borrow) (:translate sb!bignum:%subtract-with-borrow) (:policy :fast-safe) @@ -1453,9 +1459,8 @@ (inst cmp c 1) ; Set the carry flag to 1 if c=0 else to 0 (move result a) (inst sbb result b) - (inst mov borrow 0) - (inst adc borrow borrow) - (inst xor borrow 1))) + (inst mov borrow 1) + (inst sbb borrow 0))) (define-vop (bignum-mult-and-add-3-arg) @@ -1589,11 +1594,25 @@ :load-if (not (and (sc-is result unsigned-stack) (location= digit result))))) (:result-types unsigned-num) - (:generator 1 + (:generator 2 (move result digit) (move ecx count) (inst sar result :cl))) +(define-vop (digit-ashr/c) + (:translate sb!bignum:%ashr) + (:policy :fast-safe) + (:args (digit :scs (unsigned-reg unsigned-stack) :target result)) + (:arg-types unsigned-num (:constant (integer 0 63))) + (:info count) + (:results (result :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is result unsigned-stack) + (location= digit result))))) + (:result-types unsigned-num) + (:generator 1 + (move result digit) + (inst sar result count))) + (define-vop (digit-lshr digit-ashr) (:translate sb!bignum:%digit-logical-shift-right) (:generator 1