1 ;;;; a bunch of handy macros for the x86
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
14 ;;; We can load/store into fp registers through the top of stack
15 ;;; %st(0) (fr0 here). Loads imply a push to an empty register which
16 ;;; then changes all the reg numbers. These macros help manage that.
18 ;;; Use this when we don't have to load anything. It preserves old tos
19 ;;; value, but probably destroys tn with operation.
20 (defmacro with-tn@fp-top((tn) &body body)
22 (unless (zerop (tn-offset ,tn))
25 (unless (zerop (tn-offset ,tn))
28 ;;; Use this to prepare for load of new value from memory. This
29 ;;; changes the register numbering so the next instruction had better
30 ;;; be a FP load from memory; a register load from another register
31 ;;; will probably be loading the wrong register!
32 (defmacro with-empty-tn@fp-top((tn) &body body)
36 (unless (zerop (tn-offset ,tn))
37 (inst fxch ,tn)))) ; save into new dest and restore st(0)
39 ;;;; instruction-like macros
41 (defmacro move (dst src)
43 "Move SRC into DST unless they are location=."
44 (once-only ((n-dst dst)
46 `(unless (location= ,n-dst ,n-src)
47 (inst mov ,n-dst ,n-src))))
49 (defmacro align-stack-pointer (tn)
50 #!-darwin (declare (ignore tn))
53 `(inst and ,tn #xfffffff0))
55 (defmacro make-ea-for-object-slot (ptr slot lowtag &optional (size :dword))
56 `(make-ea ,size :base ,ptr :disp (- (* ,slot n-word-bytes) ,lowtag)))
58 (defmacro loadw (value ptr &optional (slot 0) (lowtag 0))
59 `(inst mov ,value (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
61 (defmacro storew (value ptr &optional (slot 0) (lowtag 0))
62 (once-only ((value value))
63 `(inst mov (make-ea-for-object-slot ,ptr ,slot ,lowtag) ,value)))
65 ;;; A handy macro for storing widetags.
66 (defmacro storeb (value ptr &optional (slot 0) (lowtag 0))
67 (once-only ((value value))
68 `(inst mov (make-ea-for-object-slot ,ptr ,slot ,lowtag :byte) ,value)))
70 (defmacro pushw (ptr &optional (slot 0) (lowtag 0))
71 `(inst push (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
73 (defmacro popw (ptr &optional (slot 0) (lowtag 0))
74 `(inst pop (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
76 (defmacro make-ea-for-vector-data (object &key (size :dword) (offset 0)
77 index (scale (ash (width-bits size) -3)))
78 `(make-ea ,size :base ,object :index ,index :scale ,scale
79 :disp (- (+ (* vector-data-offset n-word-bytes)
81 other-pointer-lowtag)))
83 ;;;; macros to generate useful values
85 (defmacro load-symbol (reg symbol)
86 `(inst mov ,reg (+ nil-value (static-symbol-offset ,symbol))))
88 (defmacro make-ea-for-symbol-value (symbol &optional (width :dword))
89 (declare (type symbol symbol))
92 (static-symbol-offset ',symbol)
93 (ash symbol-value-slot word-shift)
94 (- other-pointer-lowtag))))
96 (defmacro load-symbol-value (reg symbol)
97 `(inst mov ,reg (make-ea-for-symbol-value ,symbol)))
99 (defmacro store-symbol-value (reg symbol)
100 `(inst mov (make-ea-for-symbol-value ,symbol) ,reg))
103 (defmacro make-ea-for-symbol-tls-index (symbol)
104 (declare (type symbol symbol))
107 (static-symbol-offset ',symbol)
108 (ash symbol-tls-index-slot word-shift)
109 (- other-pointer-lowtag))))
112 (defmacro load-tl-symbol-value (reg symbol)
113 `(with-tls-ea (EA :base ,reg
115 :disp (make-ea-for-symbol-tls-index ,symbol))
116 (inst mov ,reg (make-ea :dword :base ,reg) :maybe-fs)))
118 (defmacro load-tl-symbol-value (reg symbol) `(load-symbol-value ,reg ,symbol))
121 (defmacro store-tl-symbol-value (reg symbol temp)
122 `(with-tls-ea (EA :base ,temp
124 :disp (make-ea-for-symbol-tls-index ,symbol))
125 (inst mov EA ,reg :maybe-fs)))
127 (defmacro store-tl-symbol-value (reg symbol temp)
128 (declare (ignore temp))
129 `(store-symbol-value ,reg ,symbol))
131 (defmacro load-binding-stack-pointer (reg)
133 `(with-tls-ea (EA :base ,reg
135 :disp (* 4 thread-binding-stack-pointer-slot))
136 (inst mov ,reg EA :maybe-fs))
138 `(load-symbol-value ,reg *binding-stack-pointer*))
140 (defmacro store-binding-stack-pointer (reg)
147 (with-tls-ea (EA :base eax-tn
149 :disp (* 4 thread-binding-stack-pointer-slot))
153 (with-tls-ea (EA :disp-type :constant
154 :disp (* 4 thread-binding-stack-pointer-slot))
155 (inst mov EA ,reg :maybe-fs)))
157 `(store-symbol-value ,reg *binding-stack-pointer*))
159 (defmacro load-type (target source &optional (offset 0))
161 "Loads the type bits of a pointer into target independent of
162 byte-ordering issues."
163 (once-only ((n-target target)
166 (ecase *backend-byte-order*
169 (make-ea :byte :base ,n-source :disp ,n-offset)))
172 (make-ea :byte :base ,n-source
173 :disp (+ ,n-offset (1- n-word-bytes))))))))
175 ;;;; allocation helpers
177 ;;; Allocation within alloc_region (which is thread local) can be done
178 ;;; inline. If the alloc_region is overflown allocation is done by
179 ;;; calling the C alloc() function.
181 ;;; C calls for allocation don't /seem/ to make an awful lot of
182 ;;; difference to speed. On pure consing it's about a 25%
183 ;;; gain. Guessing from historical context, it looks like inline
184 ;;; allocation was introduced before pseudo-atomic, at which time all
185 ;;; calls to alloc() would have needed a syscall to mask signals for
186 ;;; the duration. Now we have pseudoatomic there's no need for that
189 (defun allocation-dynamic-extent (alloc-tn size lowtag)
190 (inst sub esp-tn size)
191 ;; FIXME: SIZE _should_ be double-word aligned (suggested but
192 ;; unfortunately not enforced by PAD-DATA-BLOCK and
193 ;; WITH-FIXED-ALLOCATION), so that ESP is always divisible by 8 (for
194 ;; 32-bit lispobjs). In that case, this AND instruction is
195 ;; unneccessary and could be removed. If not, explain why. -- CSR,
197 (inst and esp-tn (lognot lowtag-mask))
198 (aver (not (location= alloc-tn esp-tn)))
199 (inst lea alloc-tn (make-ea :byte :base esp-tn :disp lowtag))
202 (defun allocation-notinline (alloc-tn size)
203 (let* ((alloc-tn-offset (tn-offset alloc-tn))
204 ;; C call to allocate via dispatch routines. Each
205 ;; destination has a special entry point. The size may be a
206 ;; register or a constant.
207 (tn-text (ecase alloc-tn-offset
213 (#.edi-offset "edi")))
214 (size-text (case size (8 "8_") (16 "16_") (t ""))))
215 (unless (or (eql size 8) (eql size 16))
216 (unless (and (tn-p size) (location= alloc-tn size))
217 (inst mov alloc-tn size)))
218 (inst call (make-fixup (concatenate 'string
223 (defun allocation-inline (alloc-tn size)
224 (let ((ok (gen-label))
227 (make-ea :dword :disp
228 #!+sb-thread (* n-word-bytes thread-alloc-region-slot)
229 #!-sb-thread (make-fixup "boxed_region" :foreign)
230 :scale 1)) ; thread->alloc_region.free_pointer
232 (make-ea :dword :disp
233 #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot))
234 #!-sb-thread (make-fixup "boxed_region" :foreign 4)
235 :scale 1))) ; thread->alloc_region.end_addr
236 (unless (and (tn-p size) (location= alloc-tn size))
237 (inst mov alloc-tn size))
238 (inst add alloc-tn free-pointer #!+sb-thread :fs)
239 (inst cmp alloc-tn end-addr #!+sb-thread :fs)
241 (let ((dst (ecase (tn-offset alloc-tn)
242 (#.eax-offset "alloc_overflow_eax")
243 (#.ecx-offset "alloc_overflow_ecx")
244 (#.edx-offset "alloc_overflow_edx")
245 (#.ebx-offset "alloc_overflow_ebx")
246 (#.esi-offset "alloc_overflow_esi")
247 (#.edi-offset "alloc_overflow_edi"))))
248 (inst call (make-fixup dst :foreign)))
249 (inst jmp-short done)
251 ;; Swap ALLOC-TN and FREE-POINTER
252 (cond ((and (tn-p size) (location= alloc-tn size))
253 ;; XCHG is extremely slow, use the xor swap trick
254 (inst xor alloc-tn free-pointer #!+sb-thread :fs)
255 (inst xor free-pointer alloc-tn #!+sb-thread :fs)
256 (inst xor alloc-tn free-pointer #!+sb-thread :fs))
258 ;; It's easier if SIZE is still available.
259 (inst mov free-pointer alloc-tn #!+sb-thread :fs)
260 (inst sub alloc-tn size)))
265 ;;; Emit code to allocate an object with a size in bytes given by
266 ;;; SIZE. The size may be an integer or a TN. If Inline is a VOP
267 ;;; node-var then it is used to make an appropriate speed vs size
270 ;;; Allocation should only be used inside a pseudo-atomic section, which
271 ;;; should also cover subsequent initialization of the object.
273 ;;; (FIXME: so why aren't we asserting this?)
275 (defun allocation (alloc-tn size &optional inline dynamic-extent lowtag)
278 (allocation-dynamic-extent alloc-tn size lowtag))
279 ((or (null inline) (policy inline (>= speed space)))
280 (allocation-inline alloc-tn size))
282 (allocation-notinline alloc-tn size)))
283 (when (and lowtag (not dynamic-extent))
284 (inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag)))
287 ;;; Allocate an other-pointer object of fixed SIZE with a single word
288 ;;; header having the specified WIDETAG value. The result is placed in
290 (defmacro with-fixed-allocation ((result-tn widetag size &optional inline stack-allocate-p)
293 (bug "empty &body in WITH-FIXED-ALLOCATION"))
294 (once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p))
295 `(maybe-pseudo-atomic ,stack-allocate-p
296 (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p
297 other-pointer-lowtag)
298 (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
299 ,result-tn 0 other-pointer-lowtag)
303 (defun emit-error-break (vop kind code values)
306 (inst int 3) ; i386 breakpoint instruction
308 ;; On Darwin, we need to use #x0b0f instead of int3 in order
309 ;; to generate a SIGILL instead of a SIGTRAP as darwin/x86
310 ;; doesn't seem to be reliably firing SIGTRAP
311 ;; handlers. Hopefully this will be fixed by Apple at a
315 ;; The return PC points here; note the location for the debugger.
317 (note-this-location vop :internal-error))
318 (inst byte kind) ; e.g. trap_xyyy
319 (with-adjustable-vector (vector) ; interr arguments
320 (write-var-integer code vector)
322 ;; classic CMU CL comment:
323 ;; zzzzz jrd here. tn-offset is zero for constant
325 (write-var-integer (make-sc-offset (sc-number (tn-sc tn))
326 (or (tn-offset tn) 0))
328 (inst byte (length vector))
329 (dotimes (i (length vector))
330 (inst byte (aref vector i))))))
332 (defun error-call (vop error-code &rest values)
334 "Cause an error. ERROR-CODE is the error to cause."
335 (emit-error-break vop error-trap (error-number-or-lose error-code) values))
337 (defun generate-error-code (vop error-code &rest values)
339 "Generate-Error-Code Error-code Value*
340 Emit code for an error with the specified Error-Code and context Values."
341 (assemble (*elsewhere*)
342 (let ((start-lab (gen-label)))
343 (emit-label start-lab)
344 (emit-error-break vop error-trap (error-number-or-lose error-code) values)
350 ;;; This is used to wrap operations which leave untagged memory lying
351 ;;; around. It's an operation which the AOP weenies would describe as
352 ;;; having "cross-cutting concerns", meaning it appears all over the
353 ;;; place and there's no logical single place to attach documentation.
354 ;;; grep (mostly in src/runtime) is your friend
356 ;;; KLUDGE: since the stack on the x86 is treated conservatively, it
357 ;;; does not matter whether a signal occurs during construction of a
358 ;;; dynamic-extent object, as the half-finished construction of the
359 ;;; object will not cause any difficulty. We can therefore elide
360 (defmacro maybe-pseudo-atomic (not-really-p &body forms)
363 (pseudo-atomic ,@forms)))
365 ;;; Unsafely clear pa flags so that the image can properly lose in a
368 (defmacro %clear-pseudo-atomic ()
369 '(inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot)) 0 :fs))
372 (defun emit-safepoint ()
373 (inst test al-tn (make-ea :byte
374 :disp (make-fixup "gc_safepoint_page" :foreign))))
377 (defmacro pseudo-atomic (&rest forms)
378 (with-unique-names (label)
379 `(let ((,label (gen-label)))
380 (inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot))
383 (inst xor (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot))
386 ;; if PAI was set, interrupts were disabled at the same time
387 ;; using the process signal mask.
388 (inst break pending-interrupt-trap)
391 ;; In this case, when allocation thinks a GC should be done, it
392 ;; does not mark PA as interrupted, but schedules a safepoint
393 ;; trap instead. Let's take the opportunity to trigger that
394 ;; safepoint right now.
398 (defmacro pseudo-atomic (&rest forms)
399 (with-unique-names (label)
400 `(let ((,label (gen-label)))
401 (inst mov (make-ea-for-symbol-value *pseudo-atomic-bits* :dword)
404 (inst xor (make-ea-for-symbol-value *pseudo-atomic-bits* :dword)
407 ;; if PAI was set, interrupts were disabled at the same time
408 ;; using the process signal mask.
409 (inst break pending-interrupt-trap)
410 (emit-label ,label))))
412 ;;;; indexed references
414 (defmacro define-full-compare-and-swap
415 (name type offset lowtag scs el-type &optional translate)
418 ,@(when translate `((:translate ,translate)))
420 (:args (object :scs (descriptor-reg) :to :eval)
421 (index :scs (any-reg immediate unsigned-reg) :to :result)
422 (old-value :scs ,scs :target eax)
423 (new-value :scs ,scs))
424 (:arg-types ,type tagged-num ,el-type ,el-type)
425 (:temporary (:sc descriptor-reg :offset eax-offset
426 :from (:argument 2) :to :result :target value) eax)
427 (:results (value :scs ,scs))
428 (:result-types ,el-type)
431 (let ((ea (sc-case index
433 (make-ea :dword :base object
434 :disp (- (* (+ ,offset (tn-value index))
438 (make-ea :dword :base object :index index :scale 4
439 :disp (- (* ,offset n-word-bytes)
442 (make-ea :dword :base object :index index
443 :disp (- (* ,offset n-word-bytes)
445 (inst cmpxchg ea new-value :lock))
448 (defmacro define-full-reffer (name type offset lowtag scs el-type &optional translate)
452 `((:translate ,translate)))
454 (:args (object :scs (descriptor-reg))
455 (index :scs (any-reg immediate unsigned-reg)))
456 (:arg-types ,type tagged-num)
457 (:results (value :scs ,scs))
458 (:result-types ,el-type)
459 (:generator 3 ; pw was 5
462 (inst mov value (make-ea :dword :base object
463 :disp (- (* (+ ,offset (tn-value index))
467 (inst mov value (make-ea :dword :base object :index index :scale 4
468 :disp (- (* ,offset n-word-bytes)
471 (inst mov value (make-ea :dword :base object :index index
472 :disp (- (* ,offset n-word-bytes)
475 (defmacro define-full-reffer+offset (name type offset lowtag scs el-type &optional translate)
479 `((:translate ,translate)))
481 (:args (object :scs (descriptor-reg))
482 (index :scs (any-reg immediate unsigned-reg)))
483 (:arg-types ,type tagged-num
484 (:constant (constant-displacement ,lowtag sb!vm:n-word-bytes ,offset)))
486 (:results (value :scs ,scs))
487 (:result-types ,el-type)
488 (:generator 3 ; pw was 5
491 (inst mov value (make-ea :dword :base object
492 :disp (- (* (+ ,offset
498 (inst mov value (make-ea :dword :base object :index index :scale 4
499 :disp (- (* (+ ,offset offset)
503 (inst mov value (make-ea :dword :base object :index index
504 :disp (- (* (+ ,offset offset)
508 (defmacro define-full-setter (name type offset lowtag scs el-type &optional translate)
512 `((:translate ,translate)))
514 (:args (object :scs (descriptor-reg))
515 (index :scs (any-reg immediate))
516 (value :scs ,scs :target result))
517 (:arg-types ,type tagged-num ,el-type)
518 (:results (result :scs ,scs))
519 (:result-types ,el-type)
520 (:generator 4 ; was 5
523 (inst mov (make-ea :dword :base object
524 :disp (- (* (+ ,offset (tn-value index))
529 (inst mov (make-ea :dword :base object :index index
530 :disp (- (* ,offset n-word-bytes) ,lowtag))
532 (move result value)))))
534 (defmacro define-full-setter+offset (name type offset lowtag scs el-type &optional translate)
538 `((:translate ,translate)))
540 (:args (object :scs (descriptor-reg))
541 (index :scs (any-reg immediate))
542 (value :scs ,scs :target result))
544 (:arg-types ,type tagged-num
545 (:constant (constant-displacement ,lowtag sb!vm:n-word-bytes ,offset)) ,el-type)
546 (:results (result :scs ,scs))
547 (:result-types ,el-type)
548 (:generator 4 ; was 5
551 (inst mov (make-ea :dword :base object
552 :disp (- (* (+ ,offset (tn-value index) offset)
557 (inst mov (make-ea :dword :base object :index index
558 :disp (- (* (+ ,offset offset)
559 n-word-bytes) ,lowtag))
561 (move result value)))))
563 ;;; helper for alien stuff.
565 (def!macro with-pinned-objects ((&rest objects) &body body)
566 "Arrange with the garbage collector that the pages occupied by
567 OBJECTS will not be moved in memory for the duration of BODY.
568 Useful for e.g. foreign calls where another thread may trigger
571 (let ((pins (make-gensym-list (length objects)))
572 (wpo (gensym "WITH-PINNED-OBJECTS-THUNK")))
573 ;; BODY is stuffed in a function to preserve the lexical
575 `(flet ((,wpo () (progn ,@body)))
576 (declare (muffle-conditions compiler-note))
577 ;; PINS are dx-allocated in case the compiler for some
578 ;; unfathomable reason decides to allocate value-cells
579 ;; for them -- since we have DX value-cells on x86oid
580 ;; platforms this still forces them on the stack.
581 (dx-let ,(mapcar #'list pins objects)
582 (multiple-value-prog1 (,wpo)
583 ;; TOUCH-OBJECT has a VOP with an empty body: compiler
584 ;; thinks we're using the argument and doesn't flush
585 ;; the variable, but we don't have to pay any extra
586 ;; beyond that -- and MULTIPLE-VALUE-PROG1 keeps them
587 ;; live till the body has finished. *whew*
588 ,@(mapcar (lambda (pin)
589 `(touch-object ,pin))
593 ;;; Helper to hide the fact that thread access on Windows needs one more
594 ;;; instruction, needs the FS prefix in that instruction _instead_ of
595 ;;; the actual load/store, and partially hide the resulting need for a
596 ;;; temporary TN when the non-windows might have have dereferenced an EA
597 ;;; without a TN as a base.
599 (defmacro with-tls-ea ((ea-var &key base
601 (disp-type :constant)
604 "Execute BODY with various magic. BODY is expected to emit instructions.
606 In the body, EA-VAR will be an alias for an EA which BODY can use to
607 perform a thread-local load or store.
609 Within the body, :MAYBE-FS will be replaced with :FS or NIL,
610 depending on the target, and needs to be included in any instruction
611 performing an access through the EA.
613 DISP-TYPE must be :INDEX, or :CONSTANT, and DISP must be an EA/TN,
614 or an expression returning an integer, respectively.
616 BASE must be a temporary TN, except in the following situation: BASE
617 will be unused when DISP-TYPE is constant, BASE-ALREADY-LIVE-P is
618 true, _and_ we're on POSIX. This is an intentional optimization, and
619 the caller needs to take care to ignore the TN in this case, or can
622 BASE-ALREADY-LIVE-P means that at run-time, the BASE register already
623 holds an offset that we should add to instead of overwriting it.
624 The value of the BASE register is undefined following the macro invocation."
625 (check-type base-already-live-p boolean)
626 (check-type disp-type (member :index :constant))
627 (let ((body (subst :fs :maybe-fs body)))
631 ,@(subst (if base-already-live-p
633 `(make-ea :dword :base ,base :disp ,disp)
634 ;; BASE not live and not needed, just use DISP
635 `(make-ea :dword :disp ,disp))
639 ;; need to use BASE in any case; and DISP is an EA
641 (inst ,(if base-already-live-p 'add 'mov) ,base ,disp)
642 ,@(subst `(make-ea :dword :base ,base)