From b916eedb42ae51b5069f8e2b210649b897b2ec24 Mon Sep 17 00:00:00 2001 From: Lutz Euler Date: Fri, 25 May 2012 16:56:49 +0200 Subject: [PATCH] Use multi-byte NOPs for code alignment on x86-64. This is intended to speed up execution of such code sequences. It makes the disassembly output somewhat more readable, too. The multi-byte NOP instructions are chosen according to the recommendations of both AMD and Intel. All existing x86-64 processors should support the "0f 1f" opcode used. This adds the needed infrastructure to the backend-independent compiler parts and uses it from x86-64. Backends not using this functionality are left unchanged. Extend EMIT-ALIGNMENT to allow to specify multi-byte NOPs to be used instead of repetitions of the single-byte NOP and change the call in EMIT-BLOCK-HEADER on x86-64 to trigger this. Extend EMIT-SKIP to call EMIT-LONG-NOP in this case. On x86-64, add EMIT-LONG-NOP as the instruction emitter and extend the disassembler entry for NOP to understand the multi-byte forms, too. Make EMIT-FILLER decide more carefully whether to join fillers that are adjacent in the list of segment annotations: Only join them if they are immediately adjacent in the segment, too. (Otherwise the joined filler would cover the wrong parts of a shortened alignment sequence.) In certain circumstances %EMIT-ALIGNMENT splits an alignment into two parts. This may not be necessary but has not yet been changed, so sometimes one more long NOP than needed is assembled. --- NEWS | 6 ++-- package-data-list.lisp-expr | 2 +- src/compiler/assem.lisp | 60 ++++++++++++++++++++++++++++------------ src/compiler/x86-64/call.lisp | 2 +- src/compiler/x86-64/insts.lisp | 28 +++++++++++++++++++ 5 files changed, 77 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 8833fe0..d9a5eab 100644 --- a/NEWS +++ b/NEWS @@ -15,8 +15,10 @@ changes relative to sbcl-1.0.57: * bug fix: ENSURE-GENERIC-METHOD-COMBINATION accepts method combination objects as its :METHOD-COMBINATION argument, not just lists designating method combinations. (lp#936513) - * bug fix: run-program no longer unconditionally uses /tmp/ for temporary files. - (lp#968837). + * bug fix: run-program no longer unconditionally uses /tmp/ for temporary + files. (lp#968837). + * optimization: On x86-64, code alignment of block headers is done with + multi-byte NOPs now instead of repetitions of the single-byte NOP. changes in sbcl-1.0.57 relative to sbcl-1.0.56: * RANDOM enhancements and bug fixes: diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index cd95792..1cc6e99 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -2640,7 +2640,7 @@ structure representations" "DOUBLE-FLOAT-WIDETAG" "DOUBLE-FLOAT-VALUE-SLOT" "DOUBLE-INT-CARG-REG-SC-NUMBER" "DOUBLE-REG-SC-NUMBER" "DOUBLE-STACK-SC-NUMBER" - "ERROR-TRAP" "EVEN-FIXNUM-LOWTAG" + "EMIT-LONG-NOP" "ERROR-TRAP" "EVEN-FIXNUM-LOWTAG" "EXPORTED-STATIC-SYMBOLS" "FDEFN-FUN-SLOT" "FDEFN-NAME-SLOT" "FDEFN-RAW-ADDR-SLOT" "FDEFN-SIZE" "FDEFN-WIDETAG" "FIXNUMIZE" diff --git a/src/compiler/assem.lisp b/src/compiler/assem.lisp index 88a40d2..401d9a5 100644 --- a/src/compiler/assem.lisp +++ b/src/compiler/assem.lisp @@ -703,14 +703,16 @@ (def!struct (alignment-note (:include annotation) (:conc-name alignment-) (:predicate alignment-p) - (:constructor make-alignment (bits size fill-byte)) + (:constructor make-alignment (bits size pattern)) (:copier nil)) ;; the minimum number of low-order bits that must be zero (bits 0 :type alignment) ;; the amount of filler we are assuming this alignment op will take (size 0 :type (integer 0 #.(1- (ash 1 max-alignment)))) - ;; the byte used as filling - (fill-byte 0 :type (or assembly-unit (signed-byte #.assembly-unit-bits)))) + ;; the byte used as filling or :LONG-NOP, indicating to call EMIT-LONG-NOP + ;; to emit a filling pattern + (pattern 0 :type (or possibly-signed-assembly-unit + (member :long-nop)))) ;;; a reference to someplace that needs to be back-patched when ;;; we actually know what label positions, etc. are @@ -765,12 +767,18 @@ (incf (segment-current-posn segment)) (values)) -;;; interface: Output AMOUNT copies of FILL-BYTE to SEGMENT. -(defun emit-skip (segment amount &optional (fill-byte 0)) +;;; interface: Output AMOUNT bytes to SEGMENT, either copies of +;;; PATTERN (if that is an integer), or by calling EMIT-LONG-NOP +;;; (if PATTERN is :LONG-NOP). +(defun emit-skip (segment amount &optional (pattern 0)) (declare (type segment segment) (type index amount)) - (dotimes (i amount) - (emit-byte segment fill-byte)) + (etypecase pattern + (integer + (dotimes (i amount) + (emit-byte segment pattern))) + ((eql :long-nop) + (sb!vm:emit-long-nop segment amount))) (values)) ;;; This is used to handle the common parts of annotation emission. We @@ -851,10 +859,23 @@ ;;; This is used internally whenever a chooser or alignment decides it ;;; doesn't need as much space as it originally thought. +;;; This function used to extend an existing filler instead of creating +;;; a new one when the previous segment annotation was a filler. Now +;;; this is only done if the previous filler is immediately adjacent +;;; to the new one in the segment, too. To see why this restriction is +;;; necessary, consider a jump followed by an alignment made of +;;; multi-byte NOPs when both are shrunk: The shortened alignment is +;;; reemitted at its original _start_ position but the joined filler +;;; would extend over this position and instead leave a subsequence of +;;; the segment up to the alignment's original _end_ position visible. (defun emit-filler (segment n-bytes) (declare (type index n-bytes)) (let ((last (segment-last-annotation segment))) - (cond ((and last (filler-p (car last))) + (cond ((and last + (filler-p (car last)) + (= (+ (filler-index (car last)) + (filler-bytes (car last))) + (segment-current-index segment))) (incf (filler-bytes (car last)) n-bytes)) (t (emit-annotation segment (make-filler n-bytes))))) @@ -879,7 +900,7 @@ ;;; see if we can guarantee the alignment restriction by just outputting a ;;; fixed number of bytes. If so, we do so. Otherwise, we create and emit an ;;; alignment note. -(defun %emit-alignment (segment vop bits &optional (fill-byte 0)) +(defun %emit-alignment (segment vop bits &optional (pattern 0)) (when (segment-run-scheduler segment) (schedule-pending-instructions segment)) (let ((hook (segment-inst-hook segment))) @@ -894,12 +915,12 @@ ;; alignment note to cover the rest. (let ((slop (logand offset (1- (ash 1 alignment))))) (unless (zerop slop) - (emit-skip segment (- (ash 1 alignment) slop) fill-byte))) + (emit-skip segment (- (ash 1 alignment) slop) pattern))) (let ((size (logand (1- (ash 1 bits)) (lognot (1- (ash 1 alignment)))))) (aver (> size 0)) - (emit-annotation segment (make-alignment bits size fill-byte)) - (emit-skip segment size fill-byte)) + (emit-annotation segment (make-alignment bits size pattern)) + (emit-skip segment size pattern)) (setf (segment-alignment segment) bits) (setf (segment-sync-posn segment) (segment-current-posn segment))) (t @@ -908,10 +929,10 @@ ;; assuming the last alignment was met. (let* ((mask (1- (ash 1 bits))) (new-offset (logand (+ offset mask) (lognot mask)))) - (emit-skip segment (- new-offset offset) fill-byte)) + (emit-skip segment (- new-offset offset) pattern)) ;; But we emit an alignment with size=0 so we can verify ;; that everything works. - (emit-annotation segment (make-alignment bits 0 fill-byte))))) + (emit-annotation segment (make-alignment bits 0 pattern))))) (values)) ;;; This is used to find how ``aligned'' different offsets are. @@ -1000,7 +1021,7 @@ (with-modified-segment-index-and-posn (segment index posn) (setf (segment-last-annotation segment) prev) (%emit-alignment segment nil (alignment-bits note) - (alignment-fill-byte note)) + (alignment-pattern note)) (let* ((new-index (segment-current-index segment)) (size (- new-index index)) (old-size (alignment-size note)) @@ -1049,6 +1070,11 @@ (with-modified-segment-index-and-posn (segment (alignment-index note) posn) + (when (eql (alignment-pattern note) :long-nop) + ;; We need to re-emit the alignment because a shorter + ;; multi-byte NOP pattern is most of the time not a + ;; prefix of a longer one. + (emit-skip segment size (alignment-pattern note))) (emit-filler segment additional-delta) (setf prev (segment-last-annotation segment)) (if prev @@ -1229,10 +1255,10 @@ ;;; Note: The need to capture SYMBOL-MACROLET bindings of ;;; **CURRENT-SEGMENT* and (%%CURRENT-VOP%%) prevents this from being an ;;; ordinary function. -(defmacro emit-alignment (bits &optional (fill-byte 0)) +(defmacro emit-alignment (bits &optional (pattern 0)) #!+sb-doc "Emit an alignment restriction to the current segment." - `(%emit-alignment (%%current-segment%%) (%%current-vop%%) ,bits ,fill-byte)) + `(%emit-alignment (%%current-segment%%) (%%current-vop%%) ,bits ,pattern)) (defun label-position (label &optional if-after delta) #!+sb-doc diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp index e8fdb8a..efa3b5d 100644 --- a/src/compiler/x86-64/call.lisp +++ b/src/compiler/x86-64/call.lisp @@ -575,7 +575,7 @@ (emit-label trampoline-label) (popw rbp-tn (frame-word-offset return-pc-save-offset))) (when alignp - (emit-alignment n-lowtag-bits #x90)) + (emit-alignment n-lowtag-bits :long-nop)) (emit-label start-label)) ;;; Non-TR local call for a fixed number of values passed according to diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp index 79e0c61..f7af091 100644 --- a/src/compiler/x86-64/insts.lisp +++ b/src/compiler/x86-64/insts.lisp @@ -2670,9 +2670,37 @@ (define-instruction nop (segment) (:printer byte ((op #b10010000))) + ;; multi-byte NOP + (:printer ext-reg/mem-no-width ((op '(#x1f 0))) '(:name)) (:emitter (emit-byte segment #b10010000))) +;;; Emit a sequence of single- or multi-byte NOPs to fill AMOUNT many +;;; bytes with the smallest possible number of such instructions. +(defun emit-long-nop (segment amount) + (declare (type segment segment) + (type index amount)) + ;; Pack all instructions into one byte vector to save space. + (let* ((bytes #.(coerce #(#x90 + #x66 #x90 + #x0f #x1f #x00 + #x0f #x1f #x40 #x00 + #x0f #x1f #x44 #x00 #x00 + #x66 #x0f #x1f #x44 #x00 #x00 + #x0f #x1f #x80 #x00 #x00 #x00 #x00 + #x0f #x1f #x84 #x00 #x00 #x00 #x00 #x00 + #x66 #x0f #x1f #x84 #x00 #x00 #x00 #x00 #x00) + '(vector (unsigned-byte 8)))) + (max-length (isqrt (* 2 (length bytes))))) + (loop + (let* ((count (min amount max-length)) + (start (ash (* count (1- count)) -1))) + (dotimes (i count) + (emit-byte segment (aref bytes (+ start i))))) + (if (> amount max-length) + (decf amount max-length) + (return))))) + (define-instruction wait (segment) (:printer byte ((op #b10011011))) (:emitter -- 1.7.10.4