From 90c2b0563695904419451b6172efcf9c7008ad8b Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Fri, 8 Jun 2007 08:37:10 +0000 Subject: [PATCH] 1.0.6.33: small CLOS cache improvements * General PCL cleanups: -- Get rid of FUNCTION-FUNCALL and FUNCTION-APPLY: instead just declare the argument type. ETOOMANYLAYERSOFABSTRACTION. -- Implement the unused GET-FUN in terms of GET-FUN1 for clarity. * Use a single bitmask instead of multiply and mask to compute the cache index (like the original implementation). * Slower probe depth limit growth: caches with 1024 lines used to have probe depth 16, which is starting to be on the slow side, and some fairly common generics like PRINT-OBJECT have caches with enough entries that they will be large no matter what. Instead of (ceiling (sqrt lines) 2) make it (ceiling (sqrt (sqrt lines))). * Better CACHE-HAS-INVALID-ENTRIES-P (does less work, picks up incomplete lines.) * MAP-ALL-CACHES and CHECK-CACHE-CONSISTENCY for debugging and analysis. * Typo in the format string in PRINT-OBJECT (CACHE T). * A couple of non-CLOS optimization possibilities recorded. --- OPTIMIZATIONS | 23 +++++++++++ src/pcl/cache.lisp | 96 +++++++++++++++++++++++++++++++++++++-------- src/pcl/print-object.lisp | 2 +- src/runtime/gc-common.c | 3 +- tests/init.test.sh | 4 ++ version.lisp-expr | 2 +- 6 files changed, 110 insertions(+), 20 deletions(-) diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index 631501c..45b5463 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -416,3 +416,26 @@ altogether. The former option is probably easier than the latter. Dynamic extent allocation doesn't currently work for one-element lists, since there's a source transform from (LIST X) to (CONS X NIL). +-------------------------------------------------------------------------------- +#38 + +(setf (subseq s1 start1 end1) (subseq s2 start2 end1)) + +could be transformed into + +(let ((#:s2 s2) + (#:start2 start2) + (#:end2 end2)) + (replace s1 #:s2 :start1 start1 :end1 end1 :start2 #:start2 :end2 #:end2)) + +when the return value is unused, avoiding the need to cons up the new sequence. + +-------------------------------------------------------------------------------- +#39 + +(let ((*foo* 42)) ...) + +currently compiles to code that ensures the TLS index at runtime, which +is both a decently large chunk of code and unnecessary, as we could ensure +the TLS index at load-time as well. + diff --git a/src/pcl/cache.lisp b/src/pcl/cache.lisp index 7985c5c..bb148ff 100644 --- a/src/pcl/cache.lisp +++ b/src/pcl/cache.lisp @@ -81,13 +81,28 @@ ;; (mod index (length vector)) ;; using a bitmask. (vector #() :type simple-vector) - ;; The bitmask used to calculate (mod index (length vector))l + ;; The bitmask used to calculate (mod (* line-size line-hash) (length vector))). (mask 0 :type fixnum) ;; Current probe-depth needed in the cache. (depth 0 :type index) ;; Maximum allowed probe-depth before the cache needs to expand. (limit 0 :type index)) +(defun compute-cache-mask (vector-length line-size) + ;; Since both vector-length and line-size are powers of two, we + ;; can compute a bitmask such that + ;; + ;; (logand ) + ;; + ;; is "morally equal" to + ;; + ;; (mod (* ) ) + ;; + ;; This is it: (1- vector-length) is #b111... of the approriate size + ;; to get the MOD, and (- line-size) gives right the number of zero + ;; bits at the low end. + (logand (1- vector-length) (- line-size))) + ;;; The smallest power of two that is equal to or greater then X. (declaim (inline power-of-two-ceiling)) (defun power-of-two-ceiling (x) @@ -115,7 +130,7 @@ ;;; policy into account here (speed vs. space.) (declaim (inline compute-limit)) (defun compute-limit (size) - (ceiling (sqrt size) 2)) + (ceiling (sqrt (sqrt size)))) ;;; Returns VALUE if it is not ..EMPTY.., otherwise executes ELSE: (defmacro non-empty-or (value else) @@ -173,10 +188,11 @@ (defun compute-cache-index (cache layouts) (let ((index (hash-layout-or (car layouts) (return-from compute-cache-index nil)))) + (declare (fixnum index)) (dolist (layout (cdr layouts)) (mixf index (hash-layout-or layout (return-from compute-cache-index nil)))) ;; align with cache lines - (logand (* (cache-line-size cache) index) (cache-mask cache)))) + (logand index (cache-mask cache)))) ;;; Emit code that does lookup in cache bound to CACHE-VAR using ;;; layouts bound to LAYOUT-VARS. Go to MISS-TAG on event of a miss or @@ -192,17 +208,16 @@ (with-unique-names (n-index n-vector n-depth n-pointer n-mask MATCH-WRAPPERS EXIT-WITH-HIT) `(let* ((,n-index (hash-layout-or ,(car layout-vars) (go ,miss-tag))) - (,n-vector (cache-vector ,cache-var))) + (,n-vector (cache-vector ,cache-var)) + (,n-mask (cache-mask ,cache-var))) (declare (index ,n-index)) ,@(mapcar (lambda (layout-var) `(mixf ,n-index (hash-layout-or ,layout-var (go ,miss-tag)))) (cdr layout-vars)) ;; align with cache lines - (setf ,n-index (logand (the fixnum (* ,n-index ,line-size)) - (cache-mask ,cache-var))) + (setf ,n-index (logand ,n-index ,n-mask)) (let ((,n-depth (cache-depth ,cache-var)) - (,n-pointer ,n-index) - (,n-mask (cache-mask ,cache-var))) + (,n-pointer ,n-index)) (declare (index ,n-depth ,n-pointer)) (tagbody ,MATCH-WRAPPERS @@ -312,7 +327,7 @@ :line-size line-size :vector (make-array length :initial-element '..empty..) :value value - :mask (1- length) + :mask (compute-cache-mask length line-size) :limit (compute-limit adjusted-size)) ;; Make a smaller one, then (make-cache :key-count key-count :value value :size (ceiling size 2))))) @@ -328,7 +343,7 @@ :again (setf (cache-vector copy) (make-array length :initial-element '..empty..) (cache-depth copy) 0 - (cache-mask copy) (1- length) + (cache-mask copy) (compute-cache-mask length (cache-line-size cache)) (cache-limit copy) (compute-limit (/ length (cache-line-size cache)))) (map-cache (lambda (layouts value) (unless (try-update-cache copy layouts value) @@ -347,11 +362,30 @@ copy)) (defun cache-has-invalid-entries-p (cache) - (and (find-if (lambda (elt) - (and (typep elt 'layout) - (zerop (layout-clos-hash elt)))) - (cache-vector cache)) - t)) + (let ((vector (cache-vector cache)) + (line-size (cache-line-size cache)) + (key-count (cache-key-count cache)) + (mask (cache-mask cache)) + (index 0)) + (loop + ;; Check if the line is in use, and check validity of the keys. + (let ((key1 (svref vector index))) + (when (cache-key-p key1) + (if (zerop (layout-clos-hash key1)) + ;; First key invalid. + (return-from cache-has-invalid-entries-p t) + ;; Line is in use and the first key is valid: check the rest. + (loop for offset from 1 below key-count + do (let ((thing (svref vector (+ index offset)))) + (when (or (not (cache-key-p thing)) + (zerop (layout-clos-hash thing))) + ;; Incomplete line or invalid layout. + (return-from cache-has-invalid-entries-p t))))))) + ;; Line empty of valid, onwards. + (setf index (next-cache-index mask index line-size)) + (when (zerop index) + ;; wrapped around + (return-from cache-has-invalid-entries-p nil))))) (defun hash-table-to-cache (table &key value key-count) (let ((cache (make-cache :key-count key-count :value value @@ -417,8 +451,8 @@ (line-size (cache-line-size cache)) (key-count (cache-key-count cache)) (valuep (cache-value cache)) - (size (/ (length vector) line-size)) (mask (cache-mask cache)) + (size (/ (length vector) line-size)) (index 0) (elt nil) (depth 0)) @@ -453,5 +487,33 @@ :key-count (cache-key-count cache) :line-size line-size :value valuep - :mask (cache-mask cache) + :mask mask :limit (cache-limit cache)))) + +;;;; For debugging & collecting statistics. + +(defun map-all-caches (function) + (dolist (p (list-all-packages)) + (do-symbols (s p) + (when (eq p (symbol-package s)) + (dolist (name (list s + `(setf ,s) + (slot-reader-name s) + (slot-writer-name s) + (slot-boundp-name s))) + (when (fboundp name) + (let ((fun (fdefinition name))) + (when (typep fun 'generic-function) + (let ((cache (gf-dfun-cache fun))) + (when cache + (funcall function name cache))))))))))) + +(defun check-cache-consistency (cache) + (let ((table (make-hash-table :test 'equal))) + (map-cache (lambda (layouts value) + (declare (ignore value)) + (if (gethash layouts table) + (cerror "Check futher." + "Multiple appearances of ~S." layouts) + (setf (gethash layouts table) t))) + cache))) diff --git a/src/pcl/print-object.lisp b/src/pcl/print-object.lisp index b377444..df187c3 100644 --- a/src/pcl/print-object.lisp +++ b/src/pcl/print-object.lisp @@ -131,7 +131,7 @@ (multiple-value-bind (lines-used lines-total max-depth depth-limit) (cache-statistics cache) (format stream - "~D key, ~P~:[no value~;value~], ~D/~D lines, depth ~D/~D" + "~D key~P, ~:[no value~;value~], ~D/~D lines, depth ~D/~D" (cache-key-count cache) (cache-key-count cache) (cache-value cache) diff --git a/src/runtime/gc-common.c b/src/runtime/gc-common.c index 4f95952..02b78db 100644 --- a/src/runtime/gc-common.c +++ b/src/runtime/gc-common.c @@ -358,7 +358,8 @@ scav_code_header(lispobj *where, lispobj object) entry_point != NIL; entry_point = function_ptr->next) { - gc_assert_verbose(is_lisp_pointer(entry_point), "Entry point %lx\n", + gc_assert_verbose(is_lisp_pointer(entry_point), + "Entry point %lx\n is not a lisp pointer.", (long)entry_point); function_ptr = (struct simple_fun *) native_pointer(entry_point); diff --git a/tests/init.test.sh b/tests/init.test.sh index a545d55..8f5f735 100644 --- a/tests/init.test.sh +++ b/tests/init.test.sh @@ -25,6 +25,10 @@ $SBCL <