1 ;;;; a bunch of handy macros for the x86
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
14 ;;; We can load/store into fp registers through the top of stack
15 ;;; %st(0) (fr0 here). Loads imply a push to an empty register which
16 ;;; then changes all the reg numbers. These macros help manage that.
18 ;;; Use this when we don't have to load anything. It preserves old tos
19 ;;; value, but probably destroys tn with operation.
20 (defmacro with-tn@fp-top((tn) &body body)
22 (unless (zerop (tn-offset ,tn))
25 (unless (zerop (tn-offset ,tn))
28 ;;; Use this to prepare for load of new value from memory. This
29 ;;; changes the register numbering so the next instruction had better
30 ;;; be a FP load from memory; a register load from another register
31 ;;; will probably be loading the wrong register!
32 (defmacro with-empty-tn@fp-top((tn) &body body)
36 (unless (zerop (tn-offset ,tn))
37 (inst fxch ,tn)))) ; save into new dest and restore st(0)
39 ;;;; instruction-like macros
41 (defmacro move (dst src)
43 "Move SRC into DST unless they are location=."
44 (once-only ((n-dst dst)
46 `(unless (location= ,n-dst ,n-src)
47 (inst mov ,n-dst ,n-src))))
49 (defmacro make-ea-for-object-slot (ptr slot lowtag)
50 `(make-ea :dword :base ,ptr :disp (- (* ,slot n-word-bytes) ,lowtag)))
52 (defmacro loadw (value ptr &optional (slot 0) (lowtag 0))
53 `(inst mov ,value (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
55 (defmacro storew (value ptr &optional (slot 0) (lowtag 0))
56 (once-only ((value value))
57 `(inst mov (make-ea-for-object-slot ,ptr ,slot ,lowtag) ,value)))
59 (defmacro pushw (ptr &optional (slot 0) (lowtag 0))
60 `(inst push (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
62 (defmacro popw (ptr &optional (slot 0) (lowtag 0))
63 `(inst pop (make-ea-for-object-slot ,ptr ,slot ,lowtag)))
65 ;;;; macros to generate useful values
67 (defmacro load-symbol (reg symbol)
68 `(inst mov ,reg (+ nil-value (static-symbol-offset ,symbol))))
70 (defmacro load-symbol-value (reg symbol)
74 (static-symbol-offset ',symbol)
75 (ash symbol-value-slot word-shift)
76 (- other-pointer-lowtag)))))
78 (defmacro store-symbol-value (reg symbol)
82 (static-symbol-offset ',symbol)
83 (ash symbol-value-slot word-shift)
84 (- other-pointer-lowtag)))
88 (defmacro load-tl-symbol-value (reg symbol)
93 (static-symbol-offset ',symbol)
94 (ash symbol-tls-index-slot word-shift)
95 (- other-pointer-lowtag))))
96 (inst fs-segment-prefix)
97 (inst mov ,reg (make-ea :dword :scale 1 :index ,reg))))
99 (defmacro load-tl-symbol-value (reg symbol) `(load-symbol-value ,reg ,symbol))
102 (defmacro store-tl-symbol-value (reg symbol temp)
107 (static-symbol-offset ',symbol)
108 (ash symbol-tls-index-slot word-shift)
109 (- other-pointer-lowtag))))
110 (inst fs-segment-prefix)
111 (inst mov (make-ea :dword :scale 1 :index ,temp) ,reg)))
113 (defmacro store-tl-symbol-value (reg symbol temp)
114 (declare (ignore temp))
115 `(store-symbol-value ,reg ,symbol))
117 (defmacro load-type (target source &optional (offset 0))
119 "Loads the type bits of a pointer into target independent of
120 byte-ordering issues."
121 (once-only ((n-target target)
124 (ecase *backend-byte-order*
127 (make-ea :byte :base ,n-source :disp ,n-offset)))
130 (make-ea :byte :base ,n-source :disp (+ ,n-offset 3)))))))
132 ;;;; allocation helpers
134 ;;; All allocation is done by calls to assembler routines that
135 ;;; eventually invoke the C alloc() function. Once upon a time
136 ;;; (before threads) allocation within an alloc_region could also be
137 ;;; done inline, with the aid of two C symbols storing the current
138 ;;; allocation region boundaries; however, C symbols are global.
140 ;;; C calls for allocation don't /seem/ to make an awful lot of
141 ;;; difference to speed. Guessing from historical context, it looks
142 ;;; like inline allocation was introduced before pseudo-atomic, at
143 ;;; which time all calls to alloc() would have needed a syscall to
144 ;;; mask signals for the duration. Now we have pseudoatomic there's
145 ;;; no need for that overhead. Still, inline alloc would be a neat
146 ;;; addition someday (except see below).
148 (defun allocation-dynamic-extent (alloc-tn size)
149 (inst sub esp-tn size)
150 ;; FIXME: SIZE _should_ be double-word aligned (suggested but
151 ;; unfortunately not enforced by PAD-DATA-BLOCK and
152 ;; WITH-FIXED-ALLOCATION), so that ESP is always divisible by 8 (for
153 ;; 32-bit lispobjs). In that case, this AND instruction is
154 ;; unneccessary and could be removed. If not, explain why. -- CSR,
156 (inst and esp-tn #.(ldb (byte 32 0) (lognot lowtag-mask)))
157 (aver (not (location= alloc-tn esp-tn)))
158 (inst mov alloc-tn esp-tn)
161 (defun allocation-notinline (alloc-tn size)
162 (let* ((alloc-tn-offset (tn-offset alloc-tn))
163 ;; C call to allocate via dispatch routines. Each
164 ;; destination has a special entry point. The size may be a
165 ;; register or a constant.
166 (tn-text (ecase alloc-tn-offset
172 (#.edi-offset "edi")))
173 (size-text (case size (8 "8_") (16 "16_") (t ""))))
174 (unless (or (eql size 8) (eql size 16))
175 (unless (and (tn-p size) (location= alloc-tn size))
176 (inst mov alloc-tn size)))
177 (inst call (make-fixup (concatenate 'string
182 (defun allocation-inline (alloc-tn size)
183 (let ((ok (gen-label))
185 (make-ea :dword :disp
186 #!+sb-thread (* n-word-bytes thread-alloc-region-slot)
187 #!-sb-thread (make-fixup "boxed_region" :foreign)
188 :scale 1)) ; thread->alloc_region.free_pointer
190 (make-ea :dword :disp
191 #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot))
192 #!-sb-thread (make-fixup "boxed_region" :foreign 4)
193 :scale 1))) ; thread->alloc_region.end_addr
194 (unless (and (tn-p size) (location= alloc-tn size))
195 (inst mov alloc-tn size))
196 #!+sb-thread (inst fs-segment-prefix)
197 (inst add alloc-tn free-pointer)
198 #!+sb-thread (inst fs-segment-prefix)
199 (inst cmp alloc-tn end-addr)
201 (let ((dst (ecase (tn-offset alloc-tn)
202 (#.eax-offset "alloc_overflow_eax")
203 (#.ecx-offset "alloc_overflow_ecx")
204 (#.edx-offset "alloc_overflow_edx")
205 (#.ebx-offset "alloc_overflow_ebx")
206 (#.esi-offset "alloc_overflow_esi")
207 (#.edi-offset "alloc_overflow_edi"))))
208 (inst call (make-fixup dst :foreign)))
210 #!+sb-thread (inst fs-segment-prefix)
211 (inst xchg free-pointer alloc-tn))
215 ;;; Emit code to allocate an object with a size in bytes given by
216 ;;; SIZE. The size may be an integer or a TN. If Inline is a VOP
217 ;;; node-var then it is used to make an appropriate speed vs size
220 ;;; Allocation should only be used inside a pseudo-atomic section, which
221 ;;; should also cover subsequent initialization of the object.
223 ;;; (FIXME: so why aren't we asserting this?)
225 (defun allocation (alloc-tn size &optional inline dynamic-extent)
227 (dynamic-extent (allocation-dynamic-extent alloc-tn size))
228 ;; FIXME: for reasons unknown, inline allocation is a speed win on
229 ;; non-P4s, and a speed loss on P4s (and probably other such
230 ;; high-spec high-cache machines). :INLINE-ALLOCATION-IS-GOOD is
231 ;; a bit of a KLUDGE, really. -- CSR, 2004-08-05 (following
232 ;; observations made by ASF and Juho Snellman)
233 ((and (member :inline-allocation-is-good *backend-subfeatures*)
234 (or (null inline) (policy inline (>= speed space))))
235 (allocation-inline alloc-tn size))
236 (t (allocation-notinline alloc-tn size)))
239 ;;; Allocate an other-pointer object of fixed SIZE with a single word
240 ;;; header having the specified WIDETAG value. The result is placed in
242 (defmacro with-fixed-allocation ((result-tn widetag size &optional inline)
245 (allocation ,result-tn (pad-data-block ,size) ,inline)
246 (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
249 (make-ea :byte :base ,result-tn :disp other-pointer-lowtag))
253 (eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute)
254 (defun emit-error-break (vop kind code values)
255 (let ((vector (gensym)))
256 `((inst int 3) ; i386 breakpoint instruction
257 ;; The return PC points here; note the location for the debugger.
260 (note-this-location vop :internal-error)))
261 (inst byte ,kind) ; eg trap_Xyyy
262 (with-adjustable-vector (,vector) ; interr arguments
263 (write-var-integer (error-number-or-lose ',code) ,vector)
264 ,@(mapcar (lambda (tn)
266 ;; classic CMU CL comment:
267 ;; zzzzz jrd here. tn-offset is zero for constant
269 (write-var-integer (make-sc-offset (sc-number
275 (inst byte (length ,vector))
276 (dotimes (i (length ,vector))
277 (inst byte (aref ,vector i))))))))
279 (defmacro error-call (vop error-code &rest values)
281 "Cause an error. ERROR-CODE is the error to cause."
283 (emit-error-break vop error-trap error-code values)))
285 (defmacro generate-error-code (vop error-code &rest values)
287 "Generate-Error-Code Error-code Value*
288 Emit code for an error with the specified Error-Code and context Values."
289 `(assemble (*elsewhere*)
290 (let ((start-lab (gen-label)))
291 (emit-label start-lab)
292 (error-call ,vop ,error-code ,@values)
298 ;;; This is used to wrap operations which leave untagged memory lying
299 ;;; around. It's an operation which the AOP weenies would describe as
300 ;;; having "cross-cutting concerns", meaning it appears all over the
301 ;;; place and there's no logical single place to attach documentation.
302 ;;; grep (mostly in src/runtime) is your friend
304 ;;; FIXME: *PSEUDO-ATOMIC-FOO* could be made into *PSEUDO-ATOMIC-BITS*,
305 ;;; set with a single operation and cleared with SHR *PSEUDO-ATOMIC-BITS*,-2;
306 ;;; the ATOMIC bit is bit 0, the INTERRUPTED bit is bit 1, and you check
307 ;;; the C flag after the shift to see whether you were interrupted.
309 ;;; KLUDGE: since the stack on the x86 is treated conservatively, it
310 ;;; does not matter whether a signal occurs during construction of a
311 ;;; dynamic-extent object, as the half-finished construction of the
312 ;;; object will not cause any difficulty. We can therefore elide
313 (defmacro maybe-pseudo-atomic (really-p &body forms)
316 (pseudo-atomic ,@forms)))
319 (defmacro pseudo-atomic (&rest forms)
320 (with-unique-names (label)
321 `(let ((,label (gen-label)))
322 (inst fs-segment-prefix)
323 (inst mov (make-ea :byte
324 :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0)
325 (inst fs-segment-prefix)
326 (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 1)
328 (inst fs-segment-prefix)
329 (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 0)
330 (inst fs-segment-prefix)
331 (inst cmp (make-ea :byte
332 :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0)
333 (inst jmp :eq ,label)
334 ;; if PAI was set, interrupts were disabled at the same
335 ;; time using the process signal mask.
336 (inst break pending-interrupt-trap)
337 (emit-label ,label))))
340 (defmacro pseudo-atomic (&rest forms)
341 (with-unique-names (label)
342 `(let ((,label (gen-label)))
343 ;; FIXME: The MAKE-EA noise should become a MACROLET macro
344 ;; or something. (perhaps SVLB, for static variable low
346 (inst mov (make-ea :byte :disp (+ nil-value
347 (static-symbol-offset
348 '*pseudo-atomic-interrupted*)
349 (ash symbol-value-slot word-shift)
350 ;; FIXME: Use mask, not minus, to
351 ;; take out type bits.
352 (- other-pointer-lowtag)))
354 (inst mov (make-ea :byte :disp (+ nil-value
355 (static-symbol-offset
356 '*pseudo-atomic-atomic*)
357 (ash symbol-value-slot word-shift)
358 (- other-pointer-lowtag)))
361 (inst mov (make-ea :byte :disp (+ nil-value
362 (static-symbol-offset
363 '*pseudo-atomic-atomic*)
364 (ash symbol-value-slot word-shift)
365 (- other-pointer-lowtag)))
367 ;; KLUDGE: Is there any requirement for interrupts to be
368 ;; handled in order? It seems as though an interrupt coming
369 ;; in at this point will be executed before any pending
370 ;; interrupts. Or do incoming interrupts check to see
371 ;; whether any interrupts are pending? I wish I could find
372 ;; the documentation for pseudo-atomics.. -- WHN 19991130
373 (inst cmp (make-ea :byte
375 (static-symbol-offset
376 '*pseudo-atomic-interrupted*)
377 (ash symbol-value-slot word-shift)
378 (- other-pointer-lowtag)))
380 (inst jmp :eq ,label)
381 ;; if PAI was set, interrupts were disabled at the same
382 ;; time using the process signal mask.
383 (inst break pending-interrupt-trap)
384 (emit-label ,label))))
386 ;;;; indexed references
388 (defmacro define-full-reffer (name type offset lowtag scs el-type &optional translate)
392 `((:translate ,translate)))
394 (:args (object :scs (descriptor-reg))
395 (index :scs (any-reg)))
396 (:arg-types ,type tagged-num)
397 (:results (value :scs ,scs))
398 (:result-types ,el-type)
399 (:generator 3 ; pw was 5
400 (inst mov value (make-ea :dword :base object :index index
401 :disp (- (* ,offset n-word-bytes)
403 (define-vop (,(symbolicate name "-C"))
405 `((:translate ,translate)))
407 (:args (object :scs (descriptor-reg)))
409 (:arg-types ,type (:constant (signed-byte 30)))
410 (:results (value :scs ,scs))
411 (:result-types ,el-type)
412 (:generator 2 ; pw was 5
413 (inst mov value (make-ea :dword :base object
414 :disp (- (* (+ ,offset index) n-word-bytes)
417 (defmacro define-full-setter (name type offset lowtag scs el-type &optional translate)
421 `((:translate ,translate)))
423 (:args (object :scs (descriptor-reg))
424 (index :scs (any-reg))
425 (value :scs ,scs :target result))
426 (:arg-types ,type tagged-num ,el-type)
427 (:results (result :scs ,scs))
428 (:result-types ,el-type)
429 (:generator 4 ; was 5
430 (inst mov (make-ea :dword :base object :index index
431 :disp (- (* ,offset n-word-bytes) ,lowtag))
433 (move result value)))
434 (define-vop (,(symbolicate name "-C"))
436 `((:translate ,translate)))
438 (:args (object :scs (descriptor-reg))
439 (value :scs ,scs :target result))
441 (:arg-types ,type (:constant (signed-byte 30)) ,el-type)
442 (:results (result :scs ,scs))
443 (:result-types ,el-type)
444 (:generator 3 ; was 5
445 (inst mov (make-ea :dword :base object
446 :disp (- (* (+ ,offset index) n-word-bytes)
449 (move result value)))))
451 ;;; helper for alien stuff.
452 (defmacro with-pinned-objects ((&rest objects) &body body)
453 "Arrange with the garbage collector that the pages occupied by
454 OBJECTS will not be moved in memory for the duration of BODY.
455 Useful for e.g. foreign calls where another thread may trigger
457 `(multiple-value-prog1
459 ,@(loop for p in objects
460 collect `(push-word-on-c-stack
461 (int-sap (sb!kernel:get-lisp-obj-address ,p))))
463 ;; If the body returned normally, we should restore the stack pointer
464 ;; for the benefit of any following code in the same function. If
465 ;; there's a non-local exit in the body, sp is garbage anyway and
466 ;; will get set appropriately from {a, the} frame pointer before it's
468 (pop-words-from-c-stack ,(length objects))))