X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Farith.lisp;h=2fe9abc70be6c982bdb8ed2a611ebaea95dc199a;hb=f3f677703e37f5a335b3be7fa64f7748ad969517;hp=25d7091516689865b545ec57491501053c7d4e92;hpb=0e5a40455a1a3cc16cc71ad0f0b063eb4f1f2c3f;p=sbcl.git diff --git a/src/compiler/x86/arith.lisp b/src/compiler/x86/arith.lisp index 25d7091..2fe9abc 100644 --- a/src/compiler/x86/arith.lisp +++ b/src/compiler/x86/arith.lisp @@ -404,12 +404,12 @@ (:args (x :scs (unsigned-reg) :target eax) (y :scs (unsigned-reg unsigned-stack))) (:arg-types unsigned-num unsigned-num) - (:temporary (:sc unsigned-reg :offset eax-offset :target result + (:temporary (:sc unsigned-reg :offset eax-offset :target r :from (:argument 0) :to :result) eax) (:temporary (:sc unsigned-reg :offset edx-offset :from :eval :to :result) edx) (:ignore edx) - (:results (result :scs (unsigned-reg))) + (:results (r :scs (unsigned-reg))) (:result-types unsigned-num) (:note "inline (unsigned-byte 32) arithmetic") (:vop-var vop) @@ -417,7 +417,7 @@ (:generator 6 (move eax x) (inst mul eax y) - (move result eax))) + (move r eax))) (define-vop (fast-truncate/fixnum=>fixnum fast-safe-arith-op) @@ -910,7 +910,6 @@ DONE)) -;;; Note: documentation for this function is wrong - rtfm (define-vop (signed-byte-32-len) (:translate integer-length) (:note "inline (signed-byte 32) integer-length") @@ -1164,39 +1163,6 @@ (move result prev) (inst shrd result next :cl))) -(define-source-transform 32bit-logical-not (x) - `(logand (lognot (the (unsigned-byte 32) ,x)) #.(1- (ash 1 32)))) - -(deftransform 32bit-logical-and ((x y)) - '(logand x y)) - -(define-source-transform 32bit-logical-nand (x y) - `(32bit-logical-not (32bit-logical-and ,x ,y))) - -(deftransform 32bit-logical-or ((x y)) - '(logior x y)) - -(define-source-transform 32bit-logical-nor (x y) - `(32bit-logical-not (32bit-logical-or ,x ,y))) - -(deftransform 32bit-logical-xor ((x y)) - '(logxor x y)) - -(define-source-transform 32bit-logical-eqv (x y) - `(32bit-logical-not (32bit-logical-xor ,x ,y))) - -(define-source-transform 32bit-logical-orc1 (x y) - `(32bit-logical-or (32bit-logical-not ,x) ,y)) - -(define-source-transform 32bit-logical-orc2 (x y) - `(32bit-logical-or ,x (32bit-logical-not ,y))) - -(define-source-transform 32bit-logical-andc1 (x y) - `(32bit-logical-and (32bit-logical-not ,x) ,y)) - -(define-source-transform 32bit-logical-andc2 (x y) - `(32bit-logical-and ,x (32bit-logical-not ,y))) - ;;; Only the lower 5 bits of the shift amount are significant. (define-vop (shift-towards-someplace) (:policy :fast-safe) @@ -1224,69 +1190,169 @@ (inst shl r :cl))) ;;;; Modular functions +(defmacro define-mod-binop ((name prototype) function) + `(define-vop (,name ,prototype) + (:args (x :target r :scs (unsigned-reg signed-reg) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is y unsigned-reg) + (sc-is y signed-reg)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) + (location= x r)))) + (y :scs (unsigned-reg signed-reg unsigned-stack signed-stack))) + (:arg-types untagged-num untagged-num) + (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is y unsigned-reg) + (sc-is y unsigned-reg)) + (or (sc-is r unsigned-stack) + (sc-is r unsigned-stack)) + (location= x r))))) + (:result-types unsigned-num) + (:translate ,function))) +(defmacro define-mod-binop-c ((name prototype) function) + `(define-vop (,name ,prototype) + (:args (x :target r :scs (unsigned-reg signed-reg) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) + (location= x r))))) + (:info y) + (:arg-types untagged-num (:constant (or (unsigned-byte 32) (signed-byte 32)))) + (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r unsigned-stack)) + (location= x r))))) + (:result-types unsigned-num) + (:translate ,function))) + +(macrolet ((def (name -c-p) + (let ((fun32 (intern (format nil "~S-MOD32" name))) + (vopu (intern (format nil "FAST-~S/UNSIGNED=>UNSIGNED" name))) + (vopcu (intern (format nil "FAST-~S-C/UNSIGNED=>UNSIGNED" name))) + (vopf (intern (format nil "FAST-~S/FIXNUM=>FIXNUM" name))) + (vopcf (intern (format nil "FAST-~S-C/FIXNUM=>FIXNUM" name))) + (vop32u (intern (format nil "FAST-~S-MOD32/WORD=>UNSIGNED" name))) + (vop32f (intern (format nil "FAST-~S-MOD32/FIXNUM=>FIXNUM" name))) + (vop32cu (intern (format nil "FAST-~S-MOD32-C/WORD=>UNSIGNED" name))) + (vop32cf (intern (format nil "FAST-~S-MOD32-C/FIXNUM=>FIXNUM" name))) + (sfun30 (intern (format nil "~S-SMOD30" name))) + (svop30f (intern (format nil "FAST-~S-SMOD30/FIXNUM=>FIXNUM" name))) + (svop30cf (intern (format nil "FAST-~S-SMOD30-C/FIXNUM=>FIXNUM" name)))) + `(progn + (define-modular-fun ,fun32 (x y) ,name :unsigned 32) + (define-modular-fun ,sfun30 (x y) ,name :signed 30) + (define-mod-binop (,vop32u ,vopu) ,fun32) + (define-vop (,vop32f ,vopf) (:translate ,fun32)) + (define-vop (,svop30f ,vopf) (:translate ,sfun30)) + ,@(when -c-p + `((define-mod-binop-c (,vop32cu ,vopcu) ,fun32) + (define-vop (,svop30cf ,vopcf) (:translate ,sfun30)))))))) + (def + t) + (def - t) + ;; (no -C variant as x86 MUL instruction doesn't take an immediate) + (def * nil)) -(define-modular-fun +-mod32 (x y) + 32) -(define-vop (fast-+-mod32/unsigned=>unsigned fast-+/unsigned=>unsigned) - (:translate +-mod32)) -(define-vop (fast-+-mod32-c/unsigned=>unsigned fast-+-c/unsigned=>unsigned) - (:translate +-mod32)) -(define-modular-fun --mod32 (x y) - 32) -(define-vop (fast---mod32/unsigned=>unsigned fast--/unsigned=>unsigned) - (:translate --mod32)) -(define-vop (fast---mod32-c/unsigned=>unsigned fast---c/unsigned=>unsigned) - (:translate --mod32)) - -(define-modular-fun *-mod32 (x y) * 32) -(define-vop (fast-*-mod32/unsigned=>unsigned fast-*/unsigned=>unsigned) - (:translate *-mod32)) -;;; (no -C variant as x86 MUL instruction doesn't take an immediate) (define-vop (fast-ash-left-mod32-c/unsigned=>unsigned fast-ash-c/unsigned=>unsigned) (:translate ash-left-mod32)) +(define-vop (fast-ash-left-mod32/unsigned=>unsigned + fast-ash-left/unsigned=>unsigned)) +(deftransform ash-left-mod32 ((integer count) + ((unsigned-byte 32) (unsigned-byte 5))) + (when (sb!c::constant-lvar-p count) + (sb!c::give-up-ir1-transform)) + '(%primitive fast-ash-left-mod32/unsigned=>unsigned integer count)) + +(define-vop (fast-ash-left-smod30-c/fixnum=>fixnum + fast-ash-c/fixnum=>fixnum) + (:translate ash-left-smod30)) + +(define-vop (fast-ash-left-smod30/fixnum=>fixnum + fast-ash-left/fixnum=>fixnum)) +(deftransform ash-left-smod30 ((integer count) + ((signed-byte 30) (unsigned-byte 5))) + (when (sb!c::constant-lvar-p count) + (sb!c::give-up-ir1-transform)) + '(%primitive fast-ash-left-smod30/fixnum=>fixnum integer count)) + (in-package "SB!C") (defknown sb!vm::%lea-mod32 (integer integer (member 1 2 4 8) (signed-byte 32)) (unsigned-byte 32) (foldable flushable movable)) +(defknown sb!vm::%lea-smod30 (integer integer (member 1 2 4 8) (signed-byte 32)) + (signed-byte 30) + (foldable flushable movable)) -(define-modular-fun-optimizer %lea ((base index scale disp) :width width) +(define-modular-fun-optimizer %lea ((base index scale disp) :unsigned :width width) (when (and (<= width 32) (constant-lvar-p scale) (constant-lvar-p disp)) - (cut-to-width base width) - (cut-to-width index width) + (cut-to-width base :unsigned width) + (cut-to-width index :unsigned width) 'sb!vm::%lea-mod32)) +(define-modular-fun-optimizer %lea ((base index scale disp) :signed :width width) + (when (and (<= width 30) + (constant-lvar-p scale) + (constant-lvar-p disp)) + (cut-to-width base :signed width) + (cut-to-width index :signed width) + 'sb!vm::%lea-smod30)) #+sb-xc-host -(defun sb!vm::%lea-mod32 (base index scale disp) - (ldb (byte 32 0) (%lea base index scale disp))) +(progn + (defun sb!vm::%lea-mod32 (base index scale disp) + (ldb (byte 32 0) (%lea base index scale disp))) + (defun sb!vm::%lea-smod30 (base index scale disp) + (mask-signed-field 30 (%lea base index scale disp)))) #-sb-xc-host -(defun sb!vm::%lea-mod32 (base index scale disp) - (let ((base (logand base #xffffffff)) - (index (logand index #xffffffff))) - ;; can't use modular version of %LEA, as we only have VOPs for - ;; constant SCALE and DISP. - (ldb (byte 32 0) (+ base (* index scale) disp)))) +(progn + (defun sb!vm::%lea-mod32 (base index scale disp) + (let ((base (logand base #xffffffff)) + (index (logand index #xffffffff))) + ;; can't use modular version of %LEA, as we only have VOPs for + ;; constant SCALE and DISP. + (ldb (byte 32 0) (+ base (* index scale) disp)))) + (defun sb!vm::%lea-smod30 (base index scale disp) + (let ((base (mask-signed-field 30 base)) + (index (mask-signed-field 30 index))) + ;; can't use modular version of %LEA, as we only have VOPs for + ;; constant SCALE and DISP. + (mask-signed-field 30 (+ base (* index scale) disp))))) (in-package "SB!VM") (define-vop (%lea-mod32/unsigned=>unsigned %lea/unsigned=>unsigned) (:translate %lea-mod32)) +(define-vop (%lea-smod30/fixnum=>fixnum + %lea/fixnum=>fixnum) + (:translate %lea-smod30)) ;;; logical operations -(define-modular-fun lognot-mod32 (x) lognot 32) -(define-vop (lognot-mod32/unsigned=>unsigned) +(define-modular-fun lognot-mod32 (x) lognot :unsigned 32) +(define-vop (lognot-mod32/word=>unsigned) (:translate lognot-mod32) - (:args (x :scs (unsigned-reg unsigned-stack) :target r - :load-if (not (and (sc-is x unsigned-stack) - (sc-is r unsigned-stack) + (:args (x :scs (unsigned-reg signed-reg unsigned-stack signed-stack) :target r + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) (location= x r))))) (:arg-types unsigned-num) (:results (r :scs (unsigned-reg) - :load-if (not (and (sc-is x unsigned-stack) + :load-if (not (and (or (sc-is x unsigned-stack) + (sc-is x signed-stack)) + (or (sc-is r unsigned-stack) + (sc-is r signed-stack)) (sc-is r unsigned-stack) (location= x r))))) (:result-types unsigned-num) @@ -1295,12 +1361,18 @@ (move r x) (inst not r))) -(define-modular-fun logxor-mod32 (x y) logxor 32) -(define-vop (fast-logxor-mod32/unsigned=>unsigned - fast-logxor/unsigned=>unsigned) +(define-modular-fun logxor-mod32 (x y) logxor :unsigned 32) +(define-mod-binop (fast-logxor-mod32/word=>unsigned + fast-logxor/unsigned=>unsigned) + logxor-mod32) +(define-mod-binop-c (fast-logxor-mod32-c/word=>unsigned + fast-logxor-c/unsigned=>unsigned) + logxor-mod32) +(define-vop (fast-logxor-mod32/fixnum=>fixnum + fast-logxor/fixnum=>fixnum) (:translate logxor-mod32)) -(define-vop (fast-logxor-mod32-c/unsigned=>unsigned - fast-logxor-c/unsigned=>unsigned) +(define-vop (fast-logxor-mod32-c/fixnum=>fixnum + fast-logxor-c/fixnum=>fixnum) (:translate logxor-mod32)) (define-source-transform logeqv (&rest args) @@ -1459,7 +1531,7 @@ (move hi edx) (move lo eax))) -(define-vop (bignum-lognot lognot-mod32/unsigned=>unsigned) +(define-vop (bignum-lognot lognot-mod32/word=>unsigned) (:translate sb!bignum:%lognot)) (define-vop (fixnum-to-digit) @@ -1624,40 +1696,47 @@ (in-package "SB!C") +(defun mask-result (class width result) + (ecase class + (:unsigned + `(logand ,result ,(1- (ash 1 width)))) + (:signed + `(mask-signed-field ,width ,result)))) + ;;; This is essentially a straight implementation of the algorithm in ;;; "Strength Reduction of Multiplications by Integer Constants", ;;; Youfeng Wu, ACM SIGPLAN Notices, Vol. 30, No.2, February 1995. -(defun basic-decompose-multiplication (arg num n-bits condensed) +(defun basic-decompose-multiplication (class width arg num n-bits condensed) (case (aref condensed 0) (0 (let ((tmp (min 3 (aref condensed 1)))) (decf (aref condensed 1) tmp) - `(logand #xffffffff - (%lea ,arg - ,(decompose-multiplication - arg (ash (1- num) (- tmp)) (1- n-bits) (subseq condensed 1)) - ,(ash 1 tmp) 0)))) + (mask-result class width + `(%lea ,arg + ,(decompose-multiplication class width + arg (ash (1- num) (- tmp)) (1- n-bits) (subseq condensed 1)) + ,(ash 1 tmp) 0)))) ((1 2 3) (let ((r0 (aref condensed 0))) (incf (aref condensed 1) r0) - `(logand #xffffffff - (%lea ,(decompose-multiplication - arg (- num (ash 1 r0)) (1- n-bits) (subseq condensed 1)) - ,arg - ,(ash 1 r0) 0)))) + (mask-result class width + `(%lea ,(decompose-multiplication class width + arg (- num (ash 1 r0)) (1- n-bits) (subseq condensed 1)) + ,arg + ,(ash 1 r0) 0)))) (t (let ((r0 (aref condensed 0))) (setf (aref condensed 0) 0) - `(logand #xffffffff - (ash ,(decompose-multiplication - arg (ash num (- r0)) n-bits condensed) - ,r0)))))) + (mask-result class width + `(ash ,(decompose-multiplication class width + arg (ash num (- r0)) n-bits condensed) + ,r0)))))) -(defun decompose-multiplication (arg num n-bits condensed) +(defun decompose-multiplication (class width arg num n-bits condensed) (cond ((= n-bits 0) 0) ((= num 1) arg) ((= n-bits 1) - `(logand #xffffffff (ash ,arg ,(1- (integer-length num))))) + (mask-result class width `(ash ,arg ,(1- (integer-length num))))) ((let ((max 0) (end 0)) (loop for i from 2 to (length condensed) for j = (reduce #'+ (subseq condensed 0 i)) @@ -1673,18 +1752,19 @@ (let ((n2 (+ (ash 1 (1+ j)) (ash (ldb (byte (- 32 (1+ j)) (1+ j)) num) (1+ j)))) (n1 (1+ (ldb (byte (1+ j) 0) (lognot num))))) - `(logand #xffffffff - (- ,(optimize-multiply arg n2) ,(optimize-multiply arg n1)))))))) + (mask-result class width + `(- ,(optimize-multiply class width arg n2) + ,(optimize-multiply class width arg n1)))))))) ((dolist (i '(9 5 3)) (when (integerp (/ num i)) (when (< (logcount (/ num i)) (logcount num)) (let ((x (gensym))) - (return `(let ((,x ,(optimize-multiply arg (/ num i)))) - (logand #xffffffff - (%lea ,x ,x (1- ,i) 0))))))))) - (t (basic-decompose-multiplication arg num n-bits condensed)))) - -(defun optimize-multiply (arg x) + (return `(let ((,x ,(optimize-multiply class width arg (/ num i)))) + ,(mask-result class width + `(%lea ,x ,x (1- ,i) 0))))))))) + (t (basic-decompose-multiplication class width arg num n-bits condensed)))) + +(defun optimize-multiply (class width arg x) (let* ((n-bits (logcount x)) (condensed (make-array n-bits))) (let ((count 0) (bit 0)) @@ -1694,9 +1774,9 @@ (setf count 1) (incf bit)) (t (incf count))))) - (decompose-multiplication arg x n-bits condensed))) + (decompose-multiplication class width arg x n-bits condensed))) -(defun *-transformer (y) +(defun *-transformer (class width y) (cond ((= y (ash 1 (integer-length y))) ;; there's a generic transform for y = 2^k @@ -1710,21 +1790,33 @@ ;; FIXME: should make this more fine-grained. If nothing else, ;; there should probably be a cutoff of about 9 instructions on ;; pentium-class machines. - (t (optimize-multiply 'x y)))) + (t (optimize-multiply class width 'x y)))) (deftransform * ((x y) ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) (unsigned-byte 32)) "recode as leas, shifts and adds" (let ((y (lvar-value y))) - (*-transformer y))) - + (*-transformer :unsigned 32 y))) (deftransform sb!vm::*-mod32 ((x y) ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) (unsigned-byte 32)) "recode as leas, shifts and adds" (let ((y (lvar-value y))) - (*-transformer y))) + (*-transformer :unsigned 32 y))) + +(deftransform * ((x y) + ((signed-byte 30) (constant-arg (unsigned-byte 32))) + (signed-byte 30)) + "recode as leas, shifts and adds" + (let ((y (lvar-value y))) + (*-transformer :signed 30 y))) +(deftransform sb!vm::*-smod30 + ((x y) ((signed-byte 30) (constant-arg (unsigned-byte 32))) + (signed-byte 30)) + "recode as leas, shifts and adds" + (let ((y (lvar-value y))) + (*-transformer :signed 30 y))) ;;; FIXME: we should also be able to write an optimizer or two to ;;; convert (+ (* x 2) 17), (- (* x 9) 5) to a %LEA.