Introduce out-of-line ALLOCATE-CONS routines on x86.
Saves ~250K+ of core space. The allocation sequences for other
objects could be profitably out-of-lined in the same fashion;
one and two word bignums would be the next candidates.
(with-fixed-allocation (ebx bignum-widetag (+ bignum-digits-offset 1))
(storew eax ebx bignum-digits-offset other-pointer-lowtag))
(inst ret))
+
+#+sb-assembling
+(defun frob-allocation-assembly-routine (obj lowtag arg-tn)
+ `(define-assembly-routine (,(intern (format nil "ALLOCATE-~A-TO-~A" obj arg-tn)))
+ ((:temp ,arg-tn descriptor-reg ,(intern (format nil "~A-OFFSET" arg-tn))))
+ (pseudo-atomic
+ (allocation ,arg-tn (pad-data-block ,(intern (format nil "~A-SIZE" obj))))
+ (inst lea ,arg-tn (make-ea :byte :base ,arg-tn :disp ,lowtag))
+ (inst ret))))
+
+#+sb-assembling
+(macrolet ((frob-cons-routines ()
+ (let ((routines nil))
+ (dolist (tn-offset *dword-regs*
+ `(progn ,@routines))
+ (push (frob-allocation-assembly-routine 'cons
+ list-pointer-lowtag
+ (intern (aref *dword-register-names* tn-offset)))
+ routines)))))
+ (frob-cons-routines))
(define-source-transform ninth (x) `(nth 8 ,x))
(define-source-transform tenth (x) `(nth 9 ,x))
+;;; LIST with one arg is an extremely common operation (at least inside
+;;; SBCL itself); translate it to CONS to take advantage of common
+;;; allocation routines.
+(define-source-transform list (&rest args)
+ (case (length args)
+ (1 `(cons ,(first args) nil))
+ (t (values nil t))))
+
+;;; And similarly for LIST*.
+(define-source-transform list* (&rest args)
+ (case (length args)
+ (2 `(cons ,(first args) ,(second args)))
+ (t (values nil t))))
+
;;; Translate RPLACx to LET and SETF.
(define-source-transform rplaca (x y)
(once-only ((n-x x))
(:results (result :scs (descriptor-reg)))
(:node-var node)
(:generator 50
- (pseudo-atomic
- (allocation result (pad-data-block words) node)
- (inst lea result (make-ea :byte :base result :disp lowtag))
- (when type
- (storew (logior (ash (1- words) n-widetag-bits) type)
- result
- 0
- lowtag)))))
+ ;; We special case the allocation of conses, because they're
+ ;; extremely common and because the pseudo-atomic sequence on x86
+ ;; is relatively heavyweight. However, if the user asks for top
+ ;; speed, we accomodate him. The primary reason that we don't
+ ;; also check for (< SPEED SPACE) is because we want the space
+ ;; savings that these out-of-line allocation routines bring whilst
+ ;; compiling SBCL itself. --njf, 2006-07-08
+ (if (and (= lowtag list-pointer-lowtag) (policy node (< speed 3)))
+ (let ((dst
+ #.(loop for offset in *dword-regs*
+ collect `(,offset
+ ',(intern (format nil "ALLOCATE-CONS-TO-~A"
+ (svref *dword-register-names*
+ offset)))) into cases
+ finally (return `(case (tn-offset result)
+ ,@cases)))))
+ (aver (null type))
+ (inst call (make-fixup dst :assembly-routine)))
+ (pseudo-atomic
+ (allocation result (pad-data-block words) node)
+ (inst lea result (make-ea :byte :base result :disp lowtag))
+ (when type
+ (storew (logior (ash (1- words) n-widetag-bits) type)
+ result
+ 0
+ lowtag))))))
(define-vop (var-alloc)
(:args (extra :scs (any-reg)))
(allocation result bytes node)
(inst lea result (make-ea :byte :base result :disp lowtag))
(storew header result 0 lowtag))))
-
-
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.14.8"
+"0.9.14.9"