x86-64:
* direct MIN/MAX on {SINGLE,DOUBLE}-FLOATs ({MIN,MAX}S{S,D})
-x86{,-64}:
+x86-64:
* direct LOGBITP on word-sized integers and fixnums (BT + JC)
x86{,-64}/PPC:
-* branch-free MIN/MAX on word-sized integers and fixnums
-* efficient LOGTESTs on word-sized integers and fixnums (TEST / AND.)
+* branch-free MIN/MAX on word-sized integers and fixnums (floats could
+ be handled too, modulo safety considerations on the PPC)
-PPC:
-* efficient LDB on word-sized integers and fixnums (RLWINM)
+x86-64:
+* efficient LOGTESTs on word-sized integers and fixnums (TEST)
etc., etc.
-The "easier" part claimed above would come about because the functions
-would be available for :TRANSLATE through a VOP or similar, whereas with
-the current architecture, one would have to pattern-match IR1. While
-IR1 pattern-matching would be useful in other contexts, it seems better
-here to attempt the direct :TRANSLATE route.
-
-I (NJF) don't know how to implement such architecture-specific
-optimizations whilst keeping the high->low transformations for other
-architectures. Certainly adding #!+/- magic in compiler/*.lisp could be
-done, but such a solution is somewhat inelegant. Moving the relevant
-DEFTRANSFORMs to the architecture-specific compiler/* areas is also
-possible, but that would duplicate quite a bit of code.
+(The framework for this has been implemented as of 0.9.9.18; see the
+vm-support-routine COMBINATION-IMPLEMENTATION-STYLE and its use in
+src/compiler/ir1opt.lisp, IR1-OPTIMIZE-COMBINATION. The above
+optimizations are left as an exercise for the reader.)
--------------------------------------------------------------------------------
#30
(defun foo (x y)
(constant (format nil "Const~D" offset))
(immediate-constant "Immed"))))
-
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node) (ignore node))
+ (values :default nil))
(constant (format nil "Const~D" offset))
(immediate-constant "Immed"))))
-
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node) (ignore node))
+ (values :default nil))
(let ((fun (fun-info-optimizer info)))
(unless (and fun (funcall fun node))
- (dolist (x (fun-info-transforms info))
- #!+sb-show
- (when *show-transforms-p*
- (let* ((lvar (basic-combination-fun node))
- (fname (lvar-fun-name lvar t)))
- (/show "trying transform" x (transform-function x) "for" fname)))
- (unless (ir1-transform node x)
- #!+sb-show
- (when *show-transforms-p*
- (/show "quitting because IR1-TRANSFORM result was NIL"))
- (return))))))))
+ ;; First give the VM a peek at the call
+ (multiple-value-bind (style transform)
+ (combination-implementation-style node)
+ (ecase style
+ (:direct
+ ;; The VM knows how to handle this.
+ )
+ (:transform
+ ;; The VM mostly knows how to handle this. We need
+ ;; to massage the call slightly, though.
+ (transform-call node transform (combination-fun-source-name node)))
+ (:default
+ ;; Let transforms have a crack at it.
+ (dolist (x (fun-info-transforms info))
+ #!+sb-show
+ (when *show-transforms-p*
+ (let* ((lvar (basic-combination-fun node))
+ (fname (lvar-fun-name lvar t)))
+ (/show "trying transform" x (transform-function x) "for" fname)))
+ (unless (ir1-transform node x)
+ #!+sb-show
+ (when *show-transforms-p*
+ (/show "quitting because IR1-TRANSFORM result was NIL"))
+ (return)))))))))))
(values))
(constant (format nil "Const~D" offset))
(immediate-constant "Immed"))))
-
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node) (ignore node))
+ (values :default nil))
(emit-label done))))
\f
+;;;; %LDB
+
+(defknown %%ldb (integer unsigned-byte unsigned-byte) unsigned-byte
+ (movable foldable flushable))
+
+(define-vop (ldb-c/fixnum)
+ (:translate %%ldb)
+ (:args (x :scs (any-reg)))
+ (:arg-types tagged-num (:constant (integer 1 29)) (:constant (integer 0 29)))
+ (:info size posn)
+ (:results (res :scs (any-reg)))
+ (:result-types tagged-num)
+ (:policy :fast-safe)
+ (:generator 2
+ (inst rlwinm res x
+ (mod (- 32 posn) 32) ; effectively rotate right
+ (- 32 size n-fixnum-tag-bits)
+ (- 31 n-fixnum-tag-bits))))
+
+(define-vop (ldb-c/signed)
+ (:translate %%ldb)
+ (:args (x :scs (signed-reg)))
+ (:arg-types signed-num (:constant (integer 1 29)) (:constant (integer 0 29)))
+ (:info size posn)
+ (:results (res :scs (any-reg)))
+ (:result-types tagged-num)
+ (:policy :fast-safe)
+ (:generator 3
+ (inst rlwinm res x
+ (mod (- (+ 32 n-fixnum-tag-bits) posn) 32)
+ (- 32 size n-fixnum-tag-bits)
+ (- 31 n-fixnum-tag-bits))))
+
+(define-vop (ldb-c/unsigned)
+ (:translate %%ldb)
+ (:args (x :scs (unsigned-reg)))
+ (:arg-types unsigned-num (:constant (integer 1 29)) (:constant (integer 0 29)))
+ (:info size posn)
+ (:results (res :scs (any-reg)))
+ (:result-types tagged-num)
+ (:policy :fast-safe)
+ (:generator 3
+ (inst rlwinm res x
+ (mod (- (+ 32 n-fixnum-tag-bits) posn) 32)
+ (- 32 size n-fixnum-tag-bits)
+ (- 31 n-fixnum-tag-bits))))
+
+\f
;;;; Modular functions:
(define-modular-fun lognot-mod32 (x) lognot :unsigned 32)
(define-vop (lognot-mod32/unsigned=>unsigned)
(:arg-types unsigned-num (:constant (unsigned-byte 16)))
(:info target not-p y))
+(macrolet ((define-logtest-vops ()
+ `(progn
+ ,@(loop for suffix in '(/fixnum -c/fixnum
+ /signed -c/signed
+ /unsigned -c/unsigned)
+ for sc in '(any-reg any-reg
+ signed-reg signed-reg
+ unsigned-reg unsigned-reg)
+ for cost in '(4 3 6 5 6 5)
+ collect
+ `(define-vop (,(symbolicate "FAST-LOGTEST" suffix)
+ ,(symbolicate "FAST-CONDITIONAL" suffix))
+ (:translate logtest)
+ (:temporary (:scs (,sc) :to (:result 0)) test)
+ (:generator ,cost
+ ;; We could be a lot more sophisticated here and
+ ;; check for possibilities with ANDIS..
+ ,(if (string= "-C" suffix :end2 2)
+ `(inst andi. test x ,(if (eq suffix '-c/fixnum)
+ '(fixnumize y)
+ 'y))
+ `(inst and. test x y))
+ (inst b? (if not-p :eq :ne) target)))))))
+ (define-logtest-vops))
+
+(defknown %logbitp (integer unsigned-byte) boolean
+ (movable foldable flushable))
+
+;;; We only handle the constant cases because those are the only ones
+;;; guaranteed to make it past COMBINATION-IMPLEMENTATION-STYLE.
+;;; --njf, 06-02-2006
+(define-vop (fast-logbitp-c/fixnum fast-conditional-c/fixnum)
+ (:translate %logbitp)
+ (:temporary (:scs (any-reg) :to (:result 0)) test)
+ (:generator 4
+ (if (< y 14)
+ (inst andi. test x (ash 1 (+ y n-fixnum-tag-bits)))
+ (inst andis. test x (ash 1 (- y 14))))
+ (inst b? (if not-p :eq :ne) target)))
+
+(define-vop (fast-logbitp-c/signed fast-conditional-c/signed)
+ (:translate %logbitp)
+ (:temporary (:scs (signed-reg) :to (:result 0)) test)
+ (:generator 4
+ (if (< y 16)
+ (inst andi. test x (ash 1 y))
+ (inst andis. test x (ash 1 (- y 16))))
+ (inst b? (if not-p :eq :ne) target)))
+
+(define-vop (fast-logbitp-c/unsigned fast-conditional-c/unsigned)
+ (:translate %logbitp)
+ (:temporary (:scs (unsigned-reg) :to (:result 0)) test)
+ (:generator 4
+ (if (< y 16)
+ (inst andi. test x (ash 1 y))
+ (inst andis. test x (ash 1 (- y 16))))
+ (inst b? (if not-p :eq :ne) target)))
+
(define-vop (fast-if-</fixnum fast-conditional/fixnum)
(:translate <)
(:generator 4
(constant (format nil "Const~D" offset))
(immediate-constant "Immed"))))
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node))
+ (flet ((valid-funtype (args result)
+ (sb!c::valid-fun-use node
+ (sb!c::specifier-type
+ `(function ,args ,result)))))
+ (case (sb!c::combination-fun-source-name node)
+ (logtest
+ (cond
+ ((or (valid-funtype '(fixnum fixnum) '*)
+ (valid-funtype '((signed-byte 32) (signed-byte 32)) '*)
+ (valid-funtype '((unsigned-byte 32) (unsigned-byte 32)) '*))
+ (values :direct nil))
+ (t (values :default nil))))
+ (logbitp
+ (cond
+ ((or (valid-funtype '((constant-arg (integer 0 29)) fixnum) '*)
+ (valid-funtype '((constant-arg (integer 0 31)) (signed-byte 32)) '*)
+ (valid-funtype '((constant-arg (integer 0 31)) (unsigned-byte 32)) '*))
+ (values :transform '(lambda (index integer)
+ (%logbitp integer index))))
+ (t (values :default nil))))
+ ;; FIXME: can handle MIN and MAX here
+ (sb!kernel:%ldb
+ (cond
+ ((or (valid-funtype '((constant-arg (integer 1 29))
+ (constant-arg (integer 0 29))
+ fixnum)
+ 'fixnum)
+ (valid-funtype '((constant-arg (integer 1 29))
+ (constant-arg (integer 0 29))
+ (signed-byte 32))
+ 'fixnum)
+ (valid-funtype '((constant-arg (integer 1 29))
+ (constant-arg (integer 0 29))
+ (unsigned-byte 32))
+ 'fixnum))
+ (values :transform
+ '(lambda (size posn integer)
+ (%%ldb integer size posn))))
+ (t (values :default nil))))
+ (t (values :default nil)))))
(constant (format nil "Const~D" offset))
(immediate-constant "Immed"))))
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node) (ignore node))
+ (values :default nil))
#-sb-xc-host ; (See CROSS-FLOAT-INFINITY-KLUDGE.)
(deffrob ceiling))
-(define-source-transform logtest (x y) `(not (zerop (logand ,x ,y))))
+;;; This used to be a source transform (hence the lack of restrictions
+;;; on the argument types), but we make it a regular transform so that
+;;; the VM has a chance to see the bare LOGTEST and potentiall choose
+;;; to implement it differently. --njf, 06-02-2006
+(deftransform logtest ((x y) * *)
+ `(not (zerop (logand x y))))
(deftransform logbitp
((index integer) (unsigned-byte (or (signed-byte #.sb!vm:n-word-bits)
(def!constant cfp-offset rbp-offset) ; pfw - needed by stuff in /code
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node) (ignore node))
+ (values :default nil))
(:arg-types unsigned-num (:constant (unsigned-byte 32)))
(:info target not-p y))
+(macrolet ((define-logtest-vops ()
+ `(progn
+ ,@(loop for suffix in '(/fixnum -c/fixnum
+ /signed -c/signed
+ /unsigned -c/unsigned)
+ for cost in '(4 3 6 5 6 5)
+ collect
+ `(define-vop (,(symbolicate "FAST-LOGTEST" suffix)
+ ,(symbolicate "FAST-CONDITIONAL" suffix))
+ (:translate logtest)
+ (:generator ,cost
+ (inst test x ,(if (eq suffix '-c/fixnum)
+ '(fixnumize y)
+ 'y))
+ (inst jmp (if not-p :e :ne) target)))))))
+ (define-logtest-vops))
+
+(defknown %logbitp (integer unsigned-byte) boolean
+ (movable foldable flushable))
+
+;;; too much work to do the non-constant case (maybe?)
+(define-vop (fast-logbitp-c/fixnum fast-conditional-c/fixnum)
+ (:translate %logbitp)
+ (:generator 4
+ (aver (< y 29))
+ (inst bt x (+ y n-fixnum-tag-bits))
+ (inst jmp (if not-p :nc :c) target)))
+
+(define-vop (fast-logbitp/signed fast-conditional/signed)
+ (:translate %logbitp)
+ (:generator 6
+ (inst bt x y)
+ (inst jmp (if not-p :nc :c) target)))
+
+(define-vop (fast-logbitp-c/signed fast-conditional-c/signed)
+ (:translate %logbitp)
+ (:generator 5
+ (inst bt x y)
+ (inst jmp (if not-p :nc :c) target)))
+
+(define-vop (fast-logbitp/unsigned fast-conditional/unsigned)
+ (:translate %logbitp)
+ (:generator 6
+ (inst bt x y)
+ (inst jmp (if not-p :nc :c) target)))
+
+(define-vop (fast-logbitp-/unsigned fast-conditional-c/unsigned)
+ (:translate %logbitp)
+ (:generator 5
+ (inst bt x y)
+ (inst jmp (if not-p :nc :c) target)))
(macrolet ((define-conditional-vop (tran cond unsigned not-cond not-unsigned)
`(progn
(immediate-constant "Immed")
(noise (symbol-name (sc-name sc))))))
;;; FIXME: Could this, and everything that uses it, be made #!+SB-SHOW?
+
+(!def-vm-support-routine combination-implementation-style (node)
+ (declare (type sb!c::combination node))
+ (flet ((valid-funtype (args result)
+ (sb!c::valid-fun-use node
+ (sb!c::specifier-type
+ `(function ,args ,result)))))
+ (case (sb!c::combination-fun-source-name node)
+ (logtest
+ (cond
+ ((valid-funtype '(fixnum fixnum) '*)
+ (values :direct nil))
+ ((valid-funtype '((signed-byte 32) (signed-byte 32)) '*)
+ (values :direct nil))
+ ((valid-funtype '((unsigned-byte 32) (unsigned-byte 32)) '*)
+ (values :direct nil))
+ (t (values :default nil))))
+ (logbitp
+ (cond
+ ((and (valid-funtype '((integer 0 29) fixnum) '*)
+ (sb!c::constant-lvar-p (first (sb!c::basic-combination-args node))))
+ (values :transform '(lambda (index integer)
+ (%logbitp integer index))))
+ ((valid-funtype '((integer 0 31) (signed-byte 32)) '*)
+ (values :transform '(lambda (index integer)
+ (%logbitp integer index))))
+ ((valid-funtype '((integer 0 31) (unsigned-byte 32)) '*)
+ (values :transform '(lambda (index integer)
+ (%logbitp integer index))))
+ (t (values :default nil))))
+ (t (values :default nil)))))
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.9.17"
+"0.9.9.18"