0.8alpha.0.20:

[sbcl.git] / OPTIMIZATIONS
diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS

index bb50f42..99d40ea 100644 (file)
--- a/OPTIMIZATIONS
+++ b/OPTIMIZATIONS
@@ -1,3 +1,4 @@
+#1
  (defun mysl (s)
      (declare (simple-string s))
      (declare (optimize (speed 3) (safety 0) (debug 0)))
@@ -11,13 +12,14 @@
  * On X86 I is represented as a tagged integer.
  
  * EQL uses "CMP reg,reg" instead of "CMP reg,im". This causes
-  allocation of extra register and extra move.
+  allocation of an extra register and an extra move.
  
  * Unnecessary move:
    3: SLOT S!11[EDX] {SB-C::VECTOR-LENGTH 1 7} => t23[EAX]
    4: MOVE t23[EAX] => t24[EBX]
  
  --------------------------------------------------------------------------------
+#2
  (defun quux (v)
    (declare (optimize (speed 3) (safety 0) (space 2) (debug 0)))
    (declare (type (simple-array double-float 1) v))
@@ -34,6 +36,7 @@
    and emits costy MOVE ... => FR1.
  
  --------------------------------------------------------------------------------
+#3
  (defun bar (n)
    (declare (optimize (speed 3) (safety 0) (space 2))
             (type fixnum n))
@@ -42,10 +45,8 @@
      (length v)))
  
  * IR1 does not optimize away (MAKE-LIST N).
-
-* IR1 thinks that the type of V in (LENGTH V) is (OR LIST SIMPLE-VECTOR), not
-  SIMPLE-VECTOR.
  --------------------------------------------------------------------------------
+#4
  (defun bar (v1 v2)
    (declare (optimize (speed 3) (safety 0) (space 2))
             (type (simple-array base-char 1) v1 v2))
@@ -63,3 +64,97 @@ VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL]
    last two moves.
  
  * And why two moves?
+--------------------------------------------------------------------------------
+#5
+(loop repeat 1.5)
+
+uses generic arithmetic
+--------------------------------------------------------------------------------
+#6
+09:49:05 <jtra> I have found a case in those where suboptimal code is
+  generate with nested loops, it might be moderately easy to fix that
+09:49:28 <jtra> see
+  http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
+09:50:30 <jtra> if you add declarations to dotimes, generated code is
+  almost optimal, but most inner loops run out of registers and use
+  memory location for iteration variable
+
+;;; -*- mode: lisp -*-
+;;; $Id$
+;;; http://www.bagley.org/~doug/shootout/
+;;; from Friedrich Dominicus
+
+(defun main ()
+  (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
+        (x 0))
+    (declare (fixnum n)
+             (fixnum x)
+             (optimize (speed 3) (debug 0) (safety 0)))
+    (dotimes (a n)
+      (dotimes (b n)
+        (dotimes (c n)
+          (dotimes (d n)
+            (dotimes (e n)
+              (dotimes (f n)
+                (incf x)))))))
+   (format t "~A~%" x)))
+--------------------------------------------------------------------------------
+#7
+(defun foo (x)
+  (declare (optimize speed (debug 0)))
+  (if (< x 0) x (foo (1- x))))
+
+SBCL generates a full call of FOO (but CMUCL does not).
+--------------------------------------------------------------------------------
+#8
+(defun foo (d)
+  (declare (optimize (speed 3) (safety 0) (debug 0)))
+  (declare (type (double-float 0d0 1d0) d))
+  (loop for i fixnum from 1 to 5
+     for x1 double-float = (sin d) ;;; !!!
+     do (loop for j fixnum from 1 to 4
+             sum x1 double-float)))
+
+Without the marked declaration Python will use boxed representation for X1.
+
+This is equivalent to
+
+(let ((x nil))
+  (setq x 0d0)
+  ;; use of X as DOUBLE-FLOAT
+)
+
+The initial binding is effectless, and without it X is of type
+DOUBLE-FLOAT. Unhopefully, IR1 does not optimize away effectless
+SETs/bindings, and IR2 does not perform type inference.
+--------------------------------------------------------------------------------
+#9 "Multi-path constant folding"
+(defun foo (x)
+  (if (= (cond ((irgh x) 0)
+               ((buh x) 1)
+               (t 2))
+         0)
+      :yes
+      :no))
+
+This code could be optimized to
+
+(defun foo (x)
+  (cond ((irgh x) :yes)
+        ((buh x) :no)
+        (t :no)))
+--------------------------------------------------------------------------------
+#11
+(inverted variant of #9)
+
+(lambda (x)
+  (let ((y (sap-alien x c-string)))
+    (list (alien-sap y)
+          (alien-sap y))))
+
+It could be optimized to
+
+(lambda (x) (list x x))
+
+(if Y were used only once, the current compiler would optimize it)
+--------------------------------------------------------------------------------