2 (declare (simple-string s))
3 (declare (optimize (speed 3) (safety 0) (debug 0)))
6 (dotimes (i (length s))
7 (when (eql (aref s i) #\1)
11 * On X86 I is represented as a tagged integer.
13 * EQL uses "CMP reg,reg" instead of "CMP reg,im". This causes
14 allocation of an extra register and an extra move.
17 3: SLOT S!11[EDX] {SB-C::VECTOR-LENGTH 1 7} => t23[EAX]
18 4: MOVE t23[EAX] => t24[EBX]
20 --------------------------------------------------------------------------------
22 (declare (optimize (speed 3) (safety 0) (space 2) (debug 0)))
23 (declare (type (simple-array double-float 1) v))
25 (declare (type double-float s))
26 (dotimes (i (length v))
27 (setq s (+ s (aref v i))))
30 * Python does not combine + with AREF, so generates extra move and
33 * On X86 Python thinks that all FP registers are directly accessible
34 and emits costy MOVE ... => FR1.
36 --------------------------------------------------------------------------------
38 (declare (optimize (speed 3) (safety 0) (space 2))
40 (let ((v (make-list n)))
41 (setq v (make-array n))
44 * IR1 does not optimize away (MAKE-LIST N).
45 --------------------------------------------------------------------------------
47 (declare (optimize (speed 3) (safety 0) (space 2))
48 (type (simple-array base-char 1) v1 v2))
49 (dotimes (i (length v1))
50 (setf (aref v2 i) (aref v1 i))))
52 VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL]
54 MOV #<TN t33[CL]>, #<TN t30[S2]>
55 MOV BYTE PTR [EDI+EAX+1], #<TN t33[CL]>
56 MOV #<TN t35[AL]>, #<TN t33[CL]>
57 MOV #<TN t34[S2]>, #<TN t35[AL]>
59 * The value of DATA-VECTOR-SET is not used, so there is no need in the
63 --------------------------------------------------------------------------------
66 uses generic arithmetic
67 --------------------------------------------------------------------------------
68 09:49:05 <jtra> I have found a case in those where suboptimal code is
69 generate with nested loops, it might be moderately easy to fix that
71 http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
72 09:50:30 <jtra> if you add declarations to dotimes, generated code is
73 almost optimal, but most inner loops run out of registers and use
74 memory location for iteration variable
76 ;;; -*- mode: lisp -*-
78 ;;; http://www.bagley.org/~doug/shootout/
79 ;;; from Friedrich Dominicus
82 (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
86 (optimize (speed 3) (debug 0) (safety 0)))
95 --------------------------------------------------------------------------------
97 (declare (optimize speed (debug 0)))
98 (if (< x 0) x (foo (1- x))))
100 SBCL generates a full call of FOO (but CMUCL does not).
101 --------------------------------------------------------------------------------
103 (declare (optimize (speed 3) (safety 0) (debug 0)))
104 (declare (type (double-float 0d0 1d0) d))
105 (loop for i fixnum from 1 to 5
106 for x1 double-float = (sin d) ;;; !!!
107 do (loop for j fixnum from 1 to 4
108 sum x1 double-float)))
110 Without the marked declaration Python will use boxed representation for X1.
112 This is equivalent to
116 ;; use of X as DOUBLE-FLOAT
119 The initial binding is effectless, and without it X is of type
120 DOUBLE-FLOAT. Unhopefully, IR1 does not optimize away effectless
121 SETs/bindings, and IR2 does not perform type inference.
122 --------------------------------------------------------------------------------
124 (if (= (cond ((irgh x) 0)
131 This code could be optimized to
134 (cond ((irgh x) :yes)
137 --------------------------------------------------------------------------------