OPTIMIZATIONS

   1 (defun mysl (s)
   2     (declare (simple-string s))
   3     (declare (optimize (speed 3) (safety 0) (debug 0)))
   4     (let ((c 0))
   5       (declare (fixnum c))
   6       (dotimes (i (length s))
   7         (when (eql (aref s i) #\1)
   8           (incf c)))
   9       c))
  10
  11 * On X86 I is represented as a tagged integer.
  12
  13 * EQL uses "CMP reg,reg" instead of "CMP reg,im". This causes
  14   allocation of an extra register and an extra move.
  15
  16 * Unnecessary move:
  17   3: SLOT S!11[EDX] {SB-C::VECTOR-LENGTH 1 7} => t23[EAX]
  18   4: MOVE t23[EAX] => t24[EBX]
  19
  20 --------------------------------------------------------------------------------
  21 (defun quux (v)
  22   (declare (optimize (speed 3) (safety 0) (space 2) (debug 0)))
  23   (declare (type (simple-array double-float 1) v))
  24   (let ((s 0d0))
  25     (declare (type double-float s))
  26     (dotimes (i (length v))
  27       (setq s (+ s (aref v i))))
  28     s))
  29
  30 * Python does not combine + with AREF, so generates extra move and
  31   allocates a register.
  32
  33 * On X86 Python thinks that all FP registers are directly accessible
  34   and emits costy MOVE ... => FR1.
  35
  36 --------------------------------------------------------------------------------
  37 (defun bar (n)
  38   (declare (optimize (speed 3) (safety 0) (space 2))
  39            (type fixnum n))
  40   (let ((v (make-list n)))
  41     (setq v (make-array n))
  42     (length v)))
  43
  44 * IR1 does not optimize away (MAKE-LIST N).
  45
  46 * IR1 thinks that the type of V in (LENGTH V) is (OR LIST SIMPLE-VECTOR), not
  47   SIMPLE-VECTOR.
  48 --------------------------------------------------------------------------------
  49 (defun bar (v1 v2)
  50   (declare (optimize (speed 3) (safety 0) (space 2))
  51            (type (simple-array base-char 1) v1 v2))
  52   (dotimes (i (length v1))
  53     (setf (aref v2 i) (aref v1 i))))
  54
  55 VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL]
  56                                   => t34[S2]<t35[AL]
  57         MOV     #<TN t33[CL]>, #<TN t30[S2]>
  58         MOV     BYTE PTR [EDI+EAX+1], #<TN t33[CL]>
  59         MOV     #<TN t35[AL]>, #<TN t33[CL]>
  60         MOV     #<TN t34[S2]>, #<TN t35[AL]>
  61
  62 * The value of DATA-VECTOR-SET is not used, so there is no need in the
  63   last two moves.
  64
  65 * And why two moves?
  66 --------------------------------------------------------------------------------
  67 (loop repeat 1.5)
  68
  69 uses generic arithmetic
  70 --------------------------------------------------------------------------------
  71 09:49:05 <jtra> I have found a case in those where suboptimal code is
  72   generate with nested loops, it might be moderately easy to fix that
  73 09:49:28 <jtra> see
  74   http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
  75 09:50:30 <jtra> if you add declarations to dotimes, generated code is
  76   almost optimal, but most inner loops run out of registers and use
  77   memory location for iteration variable
  78
  79 ;;; -*- mode: lisp -*-
  80 ;;; $Id$
  81 ;;; http://www.bagley.org/~doug/shootout/
  82 ;;; from Friedrich Dominicus
  83
  84 (defun main ()
  85   (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
  86         (x 0))
  87     (declare (fixnum n)
  88              (fixnum x)
  89              (optimize (speed 3) (debug 0) (safety 0)))
  90     (dotimes (a n)
  91       (dotimes (b n)
  92         (dotimes (c n)
  93           (dotimes (d n)
  94             (dotimes (e n)
  95               (dotimes (f n)
  96                 (incf x)))))))
  97    (format t "~A~%" x)))
  98 --------------------------------------------------------------------------------