From 51344a3364f2cd6b14985719a77f697c094ea14d Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Tue, 7 Feb 2006 02:35:25 +0000 Subject: [PATCH] 0.9.9.18: Introduce new vm-support-routine COMBINATION-IMPLEMENTATION-STYLE for letting the backend have a crack at implementing certain functions directly (cf. OPTIMIZATIONS, #29); ...implement a few efficient cases for PPC and x86. --- OPTIMIZATIONS | 26 ++++------- src/compiler/alpha/vm.lisp | 4 +- src/compiler/hppa/vm.lisp | 4 +- src/compiler/ir1opt.lisp | 35 +++++++++----- src/compiler/mips/vm.lisp | 4 +- src/compiler/ppc/arith.lisp | 106 +++++++++++++++++++++++++++++++++++++++++++ src/compiler/ppc/vm.lisp | 42 +++++++++++++++++ src/compiler/sparc/vm.lisp | 3 ++ src/compiler/srctran.lisp | 7 ++- src/compiler/x86-64/vm.lisp | 3 ++ src/compiler/x86/arith.lisp | 51 +++++++++++++++++++++ src/compiler/x86/vm.lisp | 31 +++++++++++++ version.lisp-expr | 2 +- 13 files changed, 285 insertions(+), 33 deletions(-) diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index 0c8e790..3795bf4 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -241,30 +241,22 @@ Python's aggressiveness would make it easier to effect changes such as x86-64: * direct MIN/MAX on {SINGLE,DOUBLE}-FLOATs ({MIN,MAX}S{S,D}) -x86{,-64}: +x86-64: * direct LOGBITP on word-sized integers and fixnums (BT + JC) x86{,-64}/PPC: -* branch-free MIN/MAX on word-sized integers and fixnums -* efficient LOGTESTs on word-sized integers and fixnums (TEST / AND.) +* branch-free MIN/MAX on word-sized integers and fixnums (floats could + be handled too, modulo safety considerations on the PPC) -PPC: -* efficient LDB on word-sized integers and fixnums (RLWINM) +x86-64: +* efficient LOGTESTs on word-sized integers and fixnums (TEST) etc., etc. -The "easier" part claimed above would come about because the functions -would be available for :TRANSLATE through a VOP or similar, whereas with -the current architecture, one would have to pattern-match IR1. While -IR1 pattern-matching would be useful in other contexts, it seems better -here to attempt the direct :TRANSLATE route. - -I (NJF) don't know how to implement such architecture-specific -optimizations whilst keeping the high->low transformations for other -architectures. Certainly adding #!+/- magic in compiler/*.lisp could be -done, but such a solution is somewhat inelegant. Moving the relevant -DEFTRANSFORMs to the architecture-specific compiler/* areas is also -possible, but that would duplicate quite a bit of code. +(The framework for this has been implemented as of 0.9.9.18; see the +vm-support-routine COMBINATION-IMPLEMENTATION-STYLE and its use in +src/compiler/ir1opt.lisp, IR1-OPTIMIZE-COMBINATION. The above +optimizations are left as an exercise for the reader.) -------------------------------------------------------------------------------- #30 (defun foo (x y) diff --git a/src/compiler/alpha/vm.lisp b/src/compiler/alpha/vm.lisp index f03179d..cc4f43e 100644 --- a/src/compiler/alpha/vm.lisp +++ b/src/compiler/alpha/vm.lisp @@ -355,4 +355,6 @@ (constant (format nil "Const~D" offset)) (immediate-constant "Immed")))) - +(!def-vm-support-routine combination-implementation-style (node) + (declare (type sb!c::combination node) (ignore node)) + (values :default nil)) diff --git a/src/compiler/hppa/vm.lisp b/src/compiler/hppa/vm.lisp index a76d03c..6257b18 100644 --- a/src/compiler/hppa/vm.lisp +++ b/src/compiler/hppa/vm.lisp @@ -352,4 +352,6 @@ (constant (format nil "Const~D" offset)) (immediate-constant "Immed")))) - +(!def-vm-support-routine combination-implementation-style (node) + (declare (type sb!c::combination node) (ignore node)) + (values :default nil)) diff --git a/src/compiler/ir1opt.lisp b/src/compiler/ir1opt.lisp index df05a08..b1c09f5 100644 --- a/src/compiler/ir1opt.lisp +++ b/src/compiler/ir1opt.lisp @@ -724,17 +724,30 @@ (let ((fun (fun-info-optimizer info))) (unless (and fun (funcall fun node)) - (dolist (x (fun-info-transforms info)) - #!+sb-show - (when *show-transforms-p* - (let* ((lvar (basic-combination-fun node)) - (fname (lvar-fun-name lvar t))) - (/show "trying transform" x (transform-function x) "for" fname))) - (unless (ir1-transform node x) - #!+sb-show - (when *show-transforms-p* - (/show "quitting because IR1-TRANSFORM result was NIL")) - (return)))))))) + ;; First give the VM a peek at the call + (multiple-value-bind (style transform) + (combination-implementation-style node) + (ecase style + (:direct + ;; The VM knows how to handle this. + ) + (:transform + ;; The VM mostly knows how to handle this. We need + ;; to massage the call slightly, though. + (transform-call node transform (combination-fun-source-name node))) + (:default + ;; Let transforms have a crack at it. + (dolist (x (fun-info-transforms info)) + #!+sb-show + (when *show-transforms-p* + (let* ((lvar (basic-combination-fun node)) + (fname (lvar-fun-name lvar t))) + (/show "trying transform" x (transform-function x) "for" fname))) + (unless (ir1-transform node x) + #!+sb-show + (when *show-transforms-p* + (/show "quitting because IR1-TRANSFORM result was NIL")) + (return))))))))))) (values)) diff --git a/src/compiler/mips/vm.lisp b/src/compiler/mips/vm.lisp index 854fc35..9030f90 100644 --- a/src/compiler/mips/vm.lisp +++ b/src/compiler/mips/vm.lisp @@ -371,4 +371,6 @@ (constant (format nil "Const~D" offset)) (immediate-constant "Immed")))) - +(!def-vm-support-routine combination-implementation-style (node) + (declare (type sb!c::combination node) (ignore node)) + (values :default nil)) diff --git a/src/compiler/ppc/arith.lisp b/src/compiler/ppc/arith.lisp index 5a20c34..aca19f4 100644 --- a/src/compiler/ppc/arith.lisp +++ b/src/compiler/ppc/arith.lisp @@ -661,6 +661,54 @@ (emit-label done)))) +;;;; %LDB + +(defknown %%ldb (integer unsigned-byte unsigned-byte) unsigned-byte + (movable foldable flushable)) + +(define-vop (ldb-c/fixnum) + (:translate %%ldb) + (:args (x :scs (any-reg))) + (:arg-types tagged-num (:constant (integer 1 29)) (:constant (integer 0 29))) + (:info size posn) + (:results (res :scs (any-reg))) + (:result-types tagged-num) + (:policy :fast-safe) + (:generator 2 + (inst rlwinm res x + (mod (- 32 posn) 32) ; effectively rotate right + (- 32 size n-fixnum-tag-bits) + (- 31 n-fixnum-tag-bits)))) + +(define-vop (ldb-c/signed) + (:translate %%ldb) + (:args (x :scs (signed-reg))) + (:arg-types signed-num (:constant (integer 1 29)) (:constant (integer 0 29))) + (:info size posn) + (:results (res :scs (any-reg))) + (:result-types tagged-num) + (:policy :fast-safe) + (:generator 3 + (inst rlwinm res x + (mod (- (+ 32 n-fixnum-tag-bits) posn) 32) + (- 32 size n-fixnum-tag-bits) + (- 31 n-fixnum-tag-bits)))) + +(define-vop (ldb-c/unsigned) + (:translate %%ldb) + (:args (x :scs (unsigned-reg))) + (:arg-types unsigned-num (:constant (integer 1 29)) (:constant (integer 0 29))) + (:info size posn) + (:results (res :scs (any-reg))) + (:result-types tagged-num) + (:policy :fast-safe) + (:generator 3 + (inst rlwinm res x + (mod (- (+ 32 n-fixnum-tag-bits) posn) 32) + (- 32 size n-fixnum-tag-bits) + (- 31 n-fixnum-tag-bits)))) + + ;;;; Modular functions: (define-modular-fun lognot-mod32 (x) lognot :unsigned 32) (define-vop (lognot-mod32/unsigned=>unsigned) @@ -753,6 +801,64 @@ (:arg-types unsigned-num (:constant (unsigned-byte 16))) (:info target not-p y)) +(macrolet ((define-logtest-vops () + `(progn + ,@(loop for suffix in '(/fixnum -c/fixnum + /signed -c/signed + /unsigned -c/unsigned) + for sc in '(any-reg any-reg + signed-reg signed-reg + unsigned-reg unsigned-reg) + for cost in '(4 3 6 5 6 5) + collect + `(define-vop (,(symbolicate "FAST-LOGTEST" suffix) + ,(symbolicate "FAST-CONDITIONAL" suffix)) + (:translate logtest) + (:temporary (:scs (,sc) :to (:result 0)) test) + (:generator ,cost + ;; We could be a lot more sophisticated here and + ;; check for possibilities with ANDIS.. + ,(if (string= "-C" suffix :end2 2) + `(inst andi. test x ,(if (eq suffix '-c/fixnum) + '(fixnumize y) + 'y)) + `(inst and. test x y)) + (inst b? (if not-p :eq :ne) target))))))) + (define-logtest-vops)) + +(defknown %logbitp (integer unsigned-byte) boolean + (movable foldable flushable)) + +;;; We only handle the constant cases because those are the only ones +;;; guaranteed to make it past COMBINATION-IMPLEMENTATION-STYLE. +;;; --njf, 06-02-2006 +(define-vop (fast-logbitp-c/fixnum fast-conditional-c/fixnum) + (:translate %logbitp) + (:temporary (:scs (any-reg) :to (:result 0)) test) + (:generator 4 + (if (< y 14) + (inst andi. test x (ash 1 (+ y n-fixnum-tag-bits))) + (inst andis. test x (ash 1 (- y 14)))) + (inst b? (if not-p :eq :ne) target))) + +(define-vop (fast-logbitp-c/signed fast-conditional-c/signed) + (:translate %logbitp) + (:temporary (:scs (signed-reg) :to (:result 0)) test) + (:generator 4 + (if (< y 16) + (inst andi. test x (ash 1 y)) + (inst andis. test x (ash 1 (- y 16)))) + (inst b? (if not-p :eq :ne) target))) + +(define-vop (fast-logbitp-c/unsigned fast-conditional-c/unsigned) + (:translate %logbitp) + (:temporary (:scs (unsigned-reg) :to (:result 0)) test) + (:generator 4 + (if (< y 16) + (inst andi. test x (ash 1 y)) + (inst andis. test x (ash 1 (- y 16)))) + (inst b? (if not-p :eq :ne) target))) + (define-vop (fast-if-