OPTIMIZATIONS

   1 #1
   2 (defun mysl (s)
   3     (declare (simple-string s))
   4     (declare (optimize (speed 3) (safety 0) (debug 0)))
   5     (let ((c 0))
   6       (declare (fixnum c))
   7       (dotimes (i (length s))
   8         (when (eql (aref s i) #\1)
   9           (incf c)))
  10       c))
  11
  12 * On X86 I is represented as a tagged integer.
  13
  14 * EQL uses "CMP reg,reg" instead of "CMP reg,im". This causes
  15   allocation of an extra register and an extra move.
  16
  17 * Unnecessary move:
  18   3: SLOT S!11[EDX] {SB-C::VECTOR-LENGTH 1 7} => t23[EAX]
  19   4: MOVE t23[EAX] => t24[EBX]
  20
  21 --------------------------------------------------------------------------------
  22 #2
  23 (defun quux (v)
  24   (declare (optimize (speed 3) (safety 0) (space 2) (debug 0)))
  25   (declare (type (simple-array double-float 1) v))
  26   (let ((s 0d0))
  27     (declare (type double-float s))
  28     (dotimes (i (length v))
  29       (setq s (+ s (aref v i))))
  30     s))
  31
  32 * Python does not combine + with AREF, so generates extra move and
  33   allocates a register.
  34
  35 * On X86 Python thinks that all FP registers are directly accessible
  36   and emits costy MOVE ... => FR1.
  37
  38 --------------------------------------------------------------------------------
  39 #3
  40 (defun bar (n)
  41   (declare (optimize (speed 3) (safety 0) (space 2))
  42            (type fixnum n))
  43   (let ((v (make-list n)))
  44     (setq v (make-array n))
  45     (length v)))
  46
  47 * IR1 does not optimize away (MAKE-LIST N).
  48 --------------------------------------------------------------------------------
  49 #4
  50 (defun bar (v1 v2)
  51   (declare (optimize (speed 3) (safety 0) (space 2))
  52            (type (simple-array base-char 1) v1 v2))
  53   (dotimes (i (length v1))
  54     (setf (aref v2 i) (aref v1 i))))
  55
  56 VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL]
  57                                   => t34[S2]<t35[AL]
  58         MOV     #<TN t33[CL]>, #<TN t30[S2]>
  59         MOV     BYTE PTR [EDI+EAX+1], #<TN t33[CL]>
  60         MOV     #<TN t35[AL]>, #<TN t33[CL]>
  61         MOV     #<TN t34[S2]>, #<TN t35[AL]>
  62
  63 * The value of DATA-VECTOR-SET is not used, so there is no need in the
  64   last two moves.
  65
  66 * And why two moves?
  67 --------------------------------------------------------------------------------
  68 #5
  69 (loop repeat 1.5)
  70
  71 uses generic arithmetic
  72 --------------------------------------------------------------------------------
  73 #6
  74 09:49:05 <jtra> I have found a case in those where suboptimal code is
  75   generate with nested loops, it might be moderately easy to fix that
  76 09:49:28 <jtra> see
  77   http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
  78 09:50:30 <jtra> if you add declarations to dotimes, generated code is
  79   almost optimal, but most inner loops run out of registers and use
  80   memory location for iteration variable
  81
  82 ;;; -*- mode: lisp -*-
  83 ;;; $Id$
  84 ;;; http://www.bagley.org/~doug/shootout/
  85 ;;; from Friedrich Dominicus
  86
  87 (defun main ()
  88   (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
  89         (x 0))
  90     (declare (fixnum n)
  91              (fixnum x)
  92              (optimize (speed 3) (debug 0) (safety 0)))
  93     (dotimes (a n)
  94       (dotimes (b n)
  95         (dotimes (c n)
  96           (dotimes (d n)
  97             (dotimes (e n)
  98               (dotimes (f n)
  99                 (incf x)))))))
 100    (format t "~A~%" x)))
 101 --------------------------------------------------------------------------------
 102 #7
 103 (defun foo (x)
 104   (declare (optimize speed (debug 0)))
 105   (if (< x 0) x (foo (1- x))))
 106
 107 SBCL generates a full call of FOO (but CMUCL does not).
 108 --------------------------------------------------------------------------------
 109 #8
 110 (defun foo (d)
 111   (declare (optimize (speed 3) (safety 0) (debug 0)))
 112   (declare (type (double-float 0d0 1d0) d))
 113   (loop for i fixnum from 1 to 5
 114      for x1 double-float = (sin d) ;;; !!!
 115      do (loop for j fixnum from 1 to 4
 116              sum x1 double-float)))
 117
 118 Without the marked declaration Python will use boxed representation for X1.
 119
 120 This is equivalent to
 121
 122 (let ((x nil))
 123   (setq x 0d0)
 124   ;; use of X as DOUBLE-FLOAT
 125 )
 126
 127 The initial binding is effectless, and without it X is of type
 128 DOUBLE-FLOAT. Unhopefully, IR1 does not optimize away effectless
 129 SETs/bindings, and IR2 does not perform type inference.
 130 --------------------------------------------------------------------------------
 131 #9
 132 (defun foo (x)
 133   (if (= (cond ((irgh x) 0)
 134                ((buh x) 1)
 135                (t 2))
 136          0)
 137       :yes
 138       :no))
 139
 140 This code could be optimized to
 141
 142 (defun foo (x)
 143   (cond ((irgh x) :yes)
 144         ((buh x) :no)
 145         (t :no)))
 146 --------------------------------------------------------------------------------
 147 #10
 148 Reported by Robert E. Brown on sbcl-devel 2003-01-21:
 149
 150 (defstruct trace-info
 151   (foo nil :type (or null function)))
 152
 153 (defun bad (x)
 154   (declare (optimize (speed 3) (safety 0)))
 155   (let ((wow (trace-info-foo x)))
 156     (or (not wow) (funcall wow 1 2 3))))
 157
 158 Python does not derive that FUNCALL is applied to FUNCTION.
 159
 160 (OR ...) is equivalent to
 161
 162 (LET ((G2407 (NOT WOW)))
 163   (IF G2407 G2407 (FUNCALL WOW 1 2 3)))
 164
 165 or to
 166
 167 (LET ((G2407 (IF WOW NIL T)))
 168   (IF G2407 G2407 (FUNCALL WOW 1 2 3)))
 169
 170 so this is similar to #9.
 171 --------------------------------------------------------------------------------