+#1
(defun mysl (s)
(declare (simple-string s))
(declare (optimize (speed 3) (safety 0) (debug 0)))
* On X86 I is represented as a tagged integer.
* EQL uses "CMP reg,reg" instead of "CMP reg,im". This causes
- allocation of extra register and extra move.
+ allocation of an extra register and an extra move.
* Unnecessary move:
3: SLOT S!11[EDX] {SB-C::VECTOR-LENGTH 1 7} => t23[EAX]
4: MOVE t23[EAX] => t24[EBX]
--------------------------------------------------------------------------------
+#2
(defun quux (v)
(declare (optimize (speed 3) (safety 0) (space 2) (debug 0)))
(declare (type (simple-array double-float 1) v))
and emits costy MOVE ... => FR1.
--------------------------------------------------------------------------------
+#3
(defun bar (n)
(declare (optimize (speed 3) (safety 0) (space 2))
(type fixnum n))
(length v)))
* IR1 does not optimize away (MAKE-LIST N).
-
-* IR1 thinks that the type of V in (LENGTH V) is (OR LIST SIMPLE-VECTOR), not
- SIMPLE-VECTOR.
--------------------------------------------------------------------------------
+#4
(defun bar (v1 v2)
(declare (optimize (speed 3) (safety 0) (space 2))
(type (simple-array base-char 1) v1 v2))
last two moves.
* And why two moves?
+--------------------------------------------------------------------------------
+#5
+(loop repeat 1.5)
+
+uses generic arithmetic
+--------------------------------------------------------------------------------
+#6
+09:49:05 <jtra> I have found a case in those where suboptimal code is
+ generate with nested loops, it might be moderately easy to fix that
+09:49:28 <jtra> see
+ http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
+09:50:30 <jtra> if you add declarations to dotimes, generated code is
+ almost optimal, but most inner loops run out of registers and use
+ memory location for iteration variable
+
+;;; -*- mode: lisp -*-
+;;; $Id$
+;;; http://www.bagley.org/~doug/shootout/
+;;; from Friedrich Dominicus
+
+(defun main ()
+ (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
+ (x 0))
+ (declare (fixnum n)
+ (fixnum x)
+ (optimize (speed 3) (debug 0) (safety 0)))
+ (dotimes (a n)
+ (dotimes (b n)
+ (dotimes (c n)
+ (dotimes (d n)
+ (dotimes (e n)
+ (dotimes (f n)
+ (incf x)))))))
+ (format t "~A~%" x)))
+--------------------------------------------------------------------------------
+#7
+(defun foo (x)
+ (declare (optimize speed (debug 0)))
+ (if (< x 0) x (foo (1- x))))
+
+SBCL generates a full call of FOO (but CMUCL does not).
+--------------------------------------------------------------------------------
+#8
+(defun foo (d)
+ (declare (optimize (speed 3) (safety 0) (debug 0)))
+ (declare (type (double-float 0d0 1d0) d))
+ (loop for i fixnum from 1 to 5
+ for x1 double-float = (sin d) ;;; !!!
+ do (loop for j fixnum from 1 to 4
+ sum x1 double-float)))
+
+Without the marked declaration Python will use boxed representation for X1.
+
+This is equivalent to
+
+(let ((x nil))
+ (setq x 0d0)
+ ;; use of X as DOUBLE-FLOAT
+)
+
+The initial binding is effectless, and without it X is of type
+DOUBLE-FLOAT. Unhopefully, IR1 does not optimize away effectless
+SETs/bindings, and IR2 does not perform type inference.
+--------------------------------------------------------------------------------
+#9 "Multi-path constant folding"
+(defun foo (x)
+ (if (= (cond ((irgh x) 0)
+ ((buh x) 1)
+ (t 2))
+ 0)
+ :yes
+ :no))
+
+This code could be optimized to
+
+(defun foo (x)
+ (cond ((irgh x) :yes)
+ ((buh x) :no)
+ (t :no)))
+--------------------------------------------------------------------------------
+#11
+(inverted variant of #9)
+
+(lambda (x)
+ (let ((y (sap-alien x c-string)))
+ (list (alien-sap y)
+ (alien-sap y))))
+
+It could be optimized to
+
+(lambda (x) (list x x))
+
+(if Y were used only once, the current compiler would optimize it)
+--------------------------------------------------------------------------------