From 808d8109a9b4203ca690706c0b2b04ca759293df Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Wed, 22 May 2013 14:19:47 -0400 Subject: [PATCH] Improved SIMD-PACK manipulation VOPs on x86-64 * Tighten naive (and, in one case, wrong-looking) lifetime specifications to enable more register coalescing. * Exploit the coalescing by replacing explicit move instructions with MOVE, and specify the SC to get MOVAPS for float data. * Microoptimise away some PSRLDQ by 0. --- src/compiler/x86-64/simd-pack.lisp | 64 ++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/compiler/x86-64/simd-pack.lisp b/src/compiler/x86-64/simd-pack.lisp index 717265c..cbb48e2 100644 --- a/src/compiler/x86-64/simd-pack.lisp +++ b/src/compiler/x86-64/simd-pack.lisp @@ -159,14 +159,15 @@ (define-vop (%simd-pack-high) (:translate %simd-pack-high) - (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg))) + (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg) + :target tmp)) (:arg-types simd-pack) - (:temporary (:sc sse-reg) tmp) + (:temporary (:sc sse-reg :from (:argument 0)) tmp) (:results (dst :scs (unsigned-reg))) (:result-types unsigned-num) (:policy :fast-safe) (:generator 3 - (inst movdqa tmp x) + (move tmp x) (inst psrldq tmp 8) (inst movd dst tmp))) @@ -208,7 +209,7 @@ (:args (lo :scs (unsigned-reg)) (hi :scs (unsigned-reg))) (:arg-types unsigned-num unsigned-num) - (:temporary (:sc sse-reg) tmp) + (:temporary (:sc int-sse-reg) tmp) (:results (dst :scs (int-sse-reg))) (:result-types simd-pack-int) (:generator 5 @@ -245,15 +246,14 @@ (values (%simd-pack-low pack) (%simd-pack-high pack)))) - (define-vop (%make-simd-pack-double) (:translate %make-simd-pack-double) (:policy :fast-safe) - (:args (lo :scs (double-reg)) - (hi :scs (double-reg))) + (:args (lo :scs (double-reg) :target dst) + (hi :scs (double-reg) :target tmp)) (:arg-types double-float double-float) - (:temporary (:sc double-sse-reg) tmp) - (:results (dst :scs (double-sse-reg))) + (:temporary (:sc double-sse-reg :from (:argument 1)) tmp) + (:results (dst :scs (double-sse-reg) :from (:argument 0))) (:result-types simd-pack-double) (:generator 5 (move dst lo) @@ -267,13 +267,13 @@ (define-vop (%make-simd-pack-single) (:translate %make-simd-pack-single) (:policy :fast-safe) - (:args (x :scs (single-reg)) - (y :scs (single-reg)) + (:args (x :scs (single-reg) :target dst) + (y :scs (single-reg) :target tmp) (z :scs (single-reg)) (w :scs (single-reg))) (:arg-types single-float single-float single-float single-float) - (:temporary (:sc sse-reg) tmp) - (:results (dst :scs (single-sse-reg))) + (:temporary (:sc single-sse-reg :from (:argument 1)) tmp) + (:results (dst :scs (single-sse-reg) :from (:argument 0))) (:result-types simd-pack-single) (:generator 5 (move dst x) @@ -290,18 +290,25 @@ (%simd-pack-tag pack)) (define-vop (%simd-pack-single-item) - (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg))) + (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg) + :target tmp)) (:arg-types simd-pack) (:info index) (:results (dst :scs (single-reg))) (:result-types single-float) - (:temporary (:sc sse-reg) tmp) + (:temporary (:sc single-sse-reg :from (:argument 0)) tmp) (:policy :fast-safe) (:generator 3 - (inst movdqa tmp x) - (inst psrldq tmp (* 4 index)) - (inst xorps dst dst) - (inst movss dst tmp))) + (cond ((and (zerop index) + (not (location= x dst))) + (inst xorps dst dst) + (inst movss dst x)) + (t + (move tmp x) + (when (plusp index) + (inst psrldq tmp (* 4 index))) + (inst xorps dst dst) + (inst movss dst tmp))))) #-sb-xc-host (declaim (inline %simd-pack-singles)) @@ -314,18 +321,25 @@ (%primitive %simd-pack-single-item pack 3))) (define-vop (%simd-pack-double-item) - (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg))) + (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg) + :target tmp)) (:info index) (:arg-types simd-pack) (:results (dst :scs (double-reg))) (:result-types double-float) - (:temporary (:sc sse-reg) tmp) + (:temporary (:sc double-sse-reg :from (:argument 0)) tmp) (:policy :fast-safe) (:generator 3 - (inst movdqa tmp x) - (inst psrldq tmp (* 8 index)) - (inst xorpd dst dst) - (inst movsd dst tmp))) + (cond ((and (zerop index) + (not (location= x dst))) + (inst xorpd dst dst) + (inst movsd dst x)) + (t + (move tmp x) + (when (plusp index) + (inst psrldq tmp (* 8 index))) + (inst xorpd dst dst) + (inst movsd dst tmp))))) #-sb-xc-host (declaim (inline %simd-pack-doubles)) -- 1.7.10.4