From 3254e1b6fb33e4ff5be5f37ba4bbcc34ca151cf7 Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Sun, 14 Aug 2011 16:49:27 -0400 Subject: [PATCH] New function: SB!KERNEL:%MULTIPLY-HIGH * Does the same thing as only returning the first value of %MULTIPLY, only better on some platforms. * Implemented vas VOPs on x86, x86-64 and PPC. The PPC code sequence is fully untested, and merely looks correct. * VOPs for fixnum first argument are included, but will only be used when the result is forcibly marked as fixnum, e.g., with TRULY-THE. Questionnable, but I'd rather err on the side of straightforwardness rather than put even more pressure on representation selection. * Use it in the division-by-multiplication transform for unsigned TRUNCATE by constant. --- base-target-features.lisp-expr | 6 ++++++ make-config.sh | 5 +++-- package-data-list.lisp-expr | 1 + src/code/numbers.lisp | 9 +++++++++ src/compiler/fndb.lisp | 3 +++ src/compiler/ppc/arith.lisp | 28 ++++++++++++++++++++++++++++ src/compiler/srctran.lisp | 27 +++++++++++++++++---------- src/compiler/x86-64/arith.lisp | 36 ++++++++++++++++++++++++++++++++++++ src/compiler/x86/arith.lisp | 36 ++++++++++++++++++++++++++++++++++++ 9 files changed, 139 insertions(+), 12 deletions(-) diff --git a/base-target-features.lisp-expr b/base-target-features.lisp-expr index ca1e90c..2428237 100644 --- a/base-target-features.lisp-expr +++ b/base-target-features.lisp-expr @@ -309,6 +309,12 @@ ;; foreign code that uses a 32-bit off_t. ; :largefile + ;; Enabled automatically on platforms that have VOPs to compute the + ;; high half of a full word-by-word multiplication. When disabled, + ;; SB-KERNEL:%MULTIPLY-HIGH is implemented in terms of + ;; SB-BIGNUM:%MULTIPLY. + ; :multiply-high-vops + ;; ;; miscellaneous notes on other things which could have special significance ;; in the *FEATURES* list diff --git a/make-config.sh b/make-config.sh index 0101f23..d51ce21 100644 --- a/make-config.sh +++ b/make-config.sh @@ -303,7 +303,7 @@ if [ "$sbcl_arch" = "x86" ]; then printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf printf ' :alien-callbacks :cycle-counter :inline-constants ' >> $ltf - printf ' :memory-barrier-vops' >> $ltf + printf ' :memory-barrier-vops :multiply-high-vops' >> $ltf case "$sbcl_os" in linux | freebsd | netbsd | openbsd | sunos | darwin | win32) printf ' :linkage-table' >> $ltf @@ -324,6 +324,7 @@ elif [ "$sbcl_arch" = "x86-64" ]; then printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf printf ' :alien-callbacks :cycle-counter :complex-float-vops' >> $ltf printf ' :float-eql-vops :inline-constants :memory-barrier-vops' >> $ltf + printf ' :multiply-high-vops' >> $ltf elif [ "$sbcl_arch" = "mips" ]; then printf ' :linkage-table' >> $ltf printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf @@ -338,7 +339,7 @@ elif [ "$sbcl_arch" = "mips" ]; then elif [ "$sbcl_arch" = "ppc" ]; then printf ' :gencgc :stack-allocatable-closures :stack-allocatable-lists' >> $ltf printf ' :linkage-table :raw-instance-init-vops :memory-barrier-vops' >> $ltf - printf ' :compare-and-swap-vops' >> $ltf + printf ' :compare-and-swap-vops :multiply-high-vops' >> $ltf if [ "$sbcl_os" = "linux" ]; then # Use a C program to detect which kind of glibc we're building on, # to bandage across the break in source compatibility between diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 0bf8ed8..fffc257 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1331,6 +1331,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%MEMBER-KEY-TEST-NOT" "%MEMBER-TEST" "%MEMBER-TEST-NOT" + "%MULTIPLY-HIGH" "%NEGATE" "%POW" "%OTHER-POINTER-WIDETAG" "%PUTHASH" diff --git a/src/code/numbers.lisp b/src/code/numbers.lisp index ecf558d..fe6f9d1 100644 --- a/src/code/numbers.lisp +++ b/src/code/numbers.lisp @@ -590,6 +590,15 @@ (foreach single-float double-float #!+long-float long-float)) (truncate-float (dispatch-type divisor)))))) +;; Only inline when no VOP exists +#!-multiply-high-vops (declaim (inline %multiply-high)) +(defun %multiply-high (x y) + (declare (type word x y)) + #!-multiply-high-vops + (values (sb!bignum:%multiply x y)) + #!+multiply-high-vops + (%multiply-high x y)) + ;;; Declare these guys inline to let them get optimized a little. ;;; ROUND and FROUND are not declared inline since they seem too ;;; obscure and too big to inline-expand by default. Also, this gives diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index a735e43..891a39c 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -319,6 +319,9 @@ (real &optional real) (values integer real) (movable foldable flushable explicit-check)) +(defknown %multiply-high (word word) word + (movable foldable flushable)) + (defknown (%floor %ceiling) (real real) (values integer real) (movable foldable flushable explicit-check)) diff --git a/src/compiler/ppc/arith.lisp b/src/compiler/ppc/arith.lisp index 448d018..e5eabce 100644 --- a/src/compiler/ppc/arith.lisp +++ b/src/compiler/ppc/arith.lisp @@ -1154,6 +1154,34 @@ (inst mullw lo x y) (inst mulhwu hi x y))) +#!+multiply-high-vops +(define-vop (mulhi) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg)) + (y :scs (unsigned-reg))) + (:arg-types unsigned-num unsigned-num) + (:results (hi :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 20 + (inst mulhwu hi x y))) + +#!+multiply-high-vops +(define-vop (mulhi/fx) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (any-reg)) + (y :scs (unsigned-reg))) + (:arg-types positive-fixnum unsigned-num) + (:temporary (:sc non-descriptor-reg :from :eval :to :result) temp) + (:temporary (:sc non-descriptor-reg :from :eval :to :result) mask) + (:results (hi :scs (any-reg))) + (:result-types positive-fixnum) + (:generator 15 + (inst mulhwu temp x y) + (inst lr mask fixnum-tag-mask) + (inst andc hi temp mask))) + (define-vop (bignum-lognot lognot-mod32/unsigned=>unsigned) (:translate sb!bignum:%lognot)) diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index 44806ec..90ebeed 100644 --- a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ -3336,16 +3336,23 @@ (multiple-value-setq (m shift2) (choose-multiplier (/ y (ash 1 shift1)) (- precision shift1)))) - (if (>= m n) - (flet ((word (x) - `(truly-the word ,x))) - `(let* ((num x) - (t1 (%multiply num ,(- m n)))) - (ash ,(word `(+ t1 (ash ,(word `(- num t1)) - -1))) - ,(- 1 shift2)))) - `(ash (%multiply (logandc2 x ,(1- (ash 1 shift1))) ,m) - ,(- (+ shift1 shift2)))))))) + (cond ((>= m n) + (flet ((word (x) + `(truly-the word ,x))) + `(let* ((num x) + (t1 (%multiply-high num ,(- m n)))) + (ash ,(word `(+ t1 (ash ,(word `(- num t1)) + -1))) + ,(- 1 shift2))))) + ((and (zerop shift1) (zerop shift2)) + (let ((max (truncate max-x y))) + ;; Explicit TRULY-THE needed to get the FIXNUM=>FIXNUM + ;; VOP. + `(truly-the (integer 0 ,max) + (%multiply-high x ,m)))) + (t + `(ash (%multiply-high (logandc2 x ,(1- (ash 1 shift1))) ,m) + ,(- (+ shift1 shift2))))))))) ;;; If the divisor is constant and both args are positive and fit in a ;;; machine word, replace the division by a multiplication and possibly diff --git a/src/compiler/x86-64/arith.lisp b/src/compiler/x86-64/arith.lisp index 02531a3..cd06c8e 100644 --- a/src/compiler/x86-64/arith.lisp +++ b/src/compiler/x86-64/arith.lisp @@ -1610,6 +1610,42 @@ constant shift greater than word length"))) (move hi edx) (move lo eax))) +#!+multiply-high-vops +(define-vop (mulhi) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)) + eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 20 + (move eax x) + (inst mul eax y) + (move hi edx))) + +#!+multiply-high-vops +(define-vop (mulhi/fx) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (any-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types positive-fixnum unsigned-num) + (:temporary (:sc any-reg :offset eax-offset :from (:argument 0)) eax) + (:temporary (:sc any-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (any-reg))) + (:result-types positive-fixnum) + (:generator 15 + (move eax x) + (inst mul eax y) + (move hi edx) + (inst and hi (lognot fixnum-tag-mask)))) + (define-vop (bignum-lognot lognot-mod64/unsigned=>unsigned) (:translate sb!bignum:%lognot)) diff --git a/src/compiler/x86/arith.lisp b/src/compiler/x86/arith.lisp index 92a0967..6965fee 100644 --- a/src/compiler/x86/arith.lisp +++ b/src/compiler/x86/arith.lisp @@ -1574,6 +1574,42 @@ constant shift greater than word length"))) (move hi edx) (move lo eax))) +#!+multiply-high-vops +(define-vop (mulhi) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)) + eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 20 + (move eax x) + (inst mul eax y) + (move hi edx))) + +#!+multiply-high-vops +(define-vop (mulhi/fx) + (:translate sb!kernel:%multiply-high) + (:policy :fast-safe) + (:args (x :scs (any-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types positive-fixnum unsigned-num) + (:temporary (:sc any-reg :offset eax-offset :from (:argument 0)) eax) + (:temporary (:sc any-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (any-reg))) + (:result-types positive-fixnum) + (:generator 15 + (move eax x) + (inst mul eax y) + (move hi edx) + (inst and hi (lognot fixnum-tag-mask)))) + (define-vop (bignum-lognot lognot-mod32/word=>unsigned) (:translate sb!bignum:%lognot)) -- 1.7.10.4