From 3a13d7769e03b21e81573e9d8f17c672961ef5e8 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Fri, 2 Jan 2004 09:37:32 +0000 Subject: [PATCH] 0.8.7.5: Implement modular (unsigned-byte 32) multiplication on x86 --- NEWS | 2 ++ src/compiler/x86/arith.lisp | 43 +++++++++++++++++++++++++++++-------------- version.lisp-expr | 2 +- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 68178d3..b2cc20b 100644 --- a/NEWS +++ b/NEWS @@ -2237,6 +2237,8 @@ changes in sbcl-0.8.8 relative to sbcl-0.8.7: * bug fix: DECODE-UNIVERSAL-TIME now accepts timezone arguments with second-resolution: integer multiples of 1/3600 between -24 and 24. (thanks to Vincent Arkesteijn) + * optimization: implemented multiplication as a modular + (UNSIGNED-BYTE 32) operation on the x86 backend. planned incompatible changes in 0.8.x: * (not done yet, but planned:) When the profiling interface settles diff --git a/src/compiler/x86/arith.lisp b/src/compiler/x86/arith.lisp index 33cf26a..5e56a99 100644 --- a/src/compiler/x86/arith.lisp +++ b/src/compiler/x86/arith.lisp @@ -1154,6 +1154,11 @@ (define-vop (fast---mod32-c/unsigned=>unsigned fast---c/unsigned=>unsigned) (:translate --mod32)) +(define-modular-fun *-mod32 (x y) * 32) +(define-vop (fast-*-mod32/unsigned=>unsigned fast-*/unsigned=>unsigned) + (:translate *-mod32)) +;;; (no -C variant as x86 MUL instruction doesn't take an immediate) + (define-vop (fast-ash-left-mod32-c/unsigned=>unsigned fast-ash-c/unsigned=>unsigned) (:translate ash-left-mod32)) @@ -1656,25 +1661,35 @@ (t (incf count))))) (decompose-multiplication arg x n-bits condensed))) +(defun *-transformer (y) + (cond + ((= y (ash 1 (integer-length y))) + ;; there's a generic transform for y = 2^k + (give-up-ir1-transform)) + ((member y '(3 5 9)) + ;; we can do these multiplications directly using LEA + `(%lea x x ,(1- y) 0)) + ((member :pentium4 *backend-subfeatures*) + ;; the pentium4's multiply unit is reportedly very good + (give-up-ir1-transform)) + ;; FIXME: should make this more fine-grained. If nothing else, + ;; there should probably be a cutoff of about 9 instructions on + ;; pentium-class machines. + (t (optimize-multiply 'x y)))) + (deftransform * ((x y) ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) (unsigned-byte 32)) "recode as leas, shifts and adds" (let ((y (lvar-value y))) - (cond - ((= y (ash 1 (integer-length y))) - ;; there's a generic transform for y = 2^k - (give-up-ir1-transform)) - ((member y '(3 5 9)) - ;; we can do these multiplications directly using LEA - `(%lea x x ,(1- y) 0)) - ((member :pentium4 *backend-subfeatures*) - ;; the pentium4's multiply unit is reportedly very good - (give-up-ir1-transform)) - ;; FIXME: should make this more fine-grained. If nothing else, - ;; there should probably be a cutoff of about 9 instructions on - ;; pentium-class machines. - (t (optimize-multiply 'x y))))) + (*-transformer y))) + +(deftransform sb!vm::*-mod32 + ((x y) ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) + (unsigned-byte 32)) + "recode as leas, shifts and adds" + (let ((y (lvar-value y))) + (*-transformer y))) ;;; FIXME: we should also be able to write an optimizer or two to ;;; convert (+ (* x 2) 17), (- (* x 9) 5) to a %LEA. diff --git a/version.lisp-expr b/version.lisp-expr index 146721d..1de180f 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.7.4" +"0.8.7.5" -- 1.7.10.4