-;;; Two allocation approaches are implemented. A call into C can be
-;;; used, and in that case special care can be taken to disable
-;;; interrupts. Alternatively with gencgc inline allocation is possible
-;;; although it isn't interrupt safe.
-
-;;; For GENCGC it is possible to inline object allocation, to permit
-;;; this set the following variable to True.
-;;;
-;;; FIXME: The comment above says that this isn't interrupt safe. Is that
-;;; right? If so, do we want to do this? And surely we don't want to do this by
-;;; default? How much time does it save to do this? Is it any different in the
-;;; current CMU CL version instead of the one that I grabbed in 1998?
-;;; (Later observation: In order to be interrupt safe, it'd probably
-;;; have to use PSEUDO-ATOMIC, so it's probably not -- yuck. Try benchmarks
-;;; with and without inline allocation, and unless the inline allocation
-;;; wins by a whole lot, it's not likely to be worth messing with. If
-;;; we want to hack up memory allocation for performance, effort spent
-;;; on DYNAMIC-EXTENT would probably give a better payoff.)
-(defvar *maybe-use-inline-allocation* t)
+;;; Allocation within alloc_region (which is thread local) can be done
+;;; inline. If the alloc_region is overflown allocation is done by
+;;; calling the C alloc() function.
+
+;;; C calls for allocation don't /seem/ to make an awful lot of
+;;; difference to speed. On pure consing it's about a 25%
+;;; gain. Guessing from historical context, it looks like inline
+;;; allocation was introduced before pseudo-atomic, at which time all
+;;; calls to alloc() would have needed a syscall to mask signals for
+;;; the duration. Now we have pseudoatomic there's no need for that
+;;; overhead.
+
+(defun allocation-dynamic-extent (alloc-tn size)
+ (inst sub esp-tn size)
+ ;; FIXME: SIZE _should_ be double-word aligned (suggested but
+ ;; unfortunately not enforced by PAD-DATA-BLOCK and
+ ;; WITH-FIXED-ALLOCATION), so that ESP is always divisible by 8 (for
+ ;; 32-bit lispobjs). In that case, this AND instruction is
+ ;; unneccessary and could be removed. If not, explain why. -- CSR,
+ ;; 2004-03-30
+ (inst and esp-tn (lognot lowtag-mask))
+ (aver (not (location= alloc-tn esp-tn)))
+ (inst mov alloc-tn esp-tn)
+ (values))
+
+(defun allocation-notinline (alloc-tn size)
+ (let* ((alloc-tn-offset (tn-offset alloc-tn))
+ ;; C call to allocate via dispatch routines. Each
+ ;; destination has a special entry point. The size may be a
+ ;; register or a constant.
+ (tn-text (ecase alloc-tn-offset
+ (#.eax-offset "eax")
+ (#.ecx-offset "ecx")
+ (#.edx-offset "edx")
+ (#.ebx-offset "ebx")
+ (#.esi-offset "esi")
+ (#.edi-offset "edi")))
+ (size-text (case size (8 "8_") (16 "16_") (t ""))))
+ (unless (or (eql size 8) (eql size 16))
+ (unless (and (tn-p size) (location= alloc-tn size))
+ (inst mov alloc-tn size)))
+ (inst call (make-fixup (concatenate 'string
+ "alloc_" size-text
+ "to_" tn-text)
+ :foreign))))
+
+(defun allocation-inline (alloc-tn size)
+ (let ((ok (gen-label))
+ (done (gen-label))
+ (free-pointer
+ (make-ea :dword :disp
+ #!+sb-thread (* n-word-bytes thread-alloc-region-slot)
+ #!-sb-thread (make-fixup "boxed_region" :foreign)
+ :scale 1)) ; thread->alloc_region.free_pointer
+ (end-addr
+ (make-ea :dword :disp
+ #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot))
+ #!-sb-thread (make-fixup "boxed_region" :foreign 4)
+ :scale 1))) ; thread->alloc_region.end_addr
+ (unless (and (tn-p size) (location= alloc-tn size))
+ (inst mov alloc-tn size))
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst add alloc-tn free-pointer)
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst cmp alloc-tn end-addr)
+ (inst jmp :be ok)
+ (let ((dst (ecase (tn-offset alloc-tn)
+ (#.eax-offset "alloc_overflow_eax")
+ (#.ecx-offset "alloc_overflow_ecx")
+ (#.edx-offset "alloc_overflow_edx")
+ (#.ebx-offset "alloc_overflow_ebx")
+ (#.esi-offset "alloc_overflow_esi")
+ (#.edi-offset "alloc_overflow_edi"))))
+ (inst call (make-fixup dst :foreign)))
+ (inst jmp-short done)
+ (emit-label ok)
+ ;; Swap ALLOC-TN and FREE-POINTER
+ (cond ((and (tn-p size) (location= alloc-tn size))
+ ;; XCHG is extremely slow, use the xor swap trick
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst xor alloc-tn free-pointer)
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst xor free-pointer alloc-tn)
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst xor alloc-tn free-pointer))
+ (t
+ ;; It's easier if SIZE is still available.
+ #!+sb-thread (inst fs-segment-prefix)
+ (inst mov free-pointer alloc-tn)
+ (inst sub alloc-tn size)))
+ (emit-label done))
+ (values))
+