- `(let ((src ,src)
- (src-start (* ,src-start sb!vm:byte-bits))
- (dst ,dst)
- (dst-start (* ,dst-start sb!vm:byte-bits))
- (dst-end (* ,dst-end sb!vm:byte-bits)))
- (let ((length (- dst-end dst-start)))
- (etypecase src
- (system-area-pointer
- (etypecase dst
- (system-area-pointer
- (system-area-copy src src-start dst dst-start length))
- ((simple-unboxed-array (*))
- (copy-from-system-area src src-start
- dst (+ dst-start ,vector-data-bit-offset)
- length))))
- ((simple-unboxed-array (*))
- (etypecase dst
- (system-area-pointer
- (copy-to-system-area src (+ src-start ,vector-data-bit-offset)
- dst dst-start
- length))
- ((simple-unboxed-array (*))
- (bit-bash-copy src (+ src-start ,vector-data-bit-offset)
- dst (+ dst-start ,vector-data-bit-offset)
- length))))))))
+
+ ;; new version
+ ;;
+ ;; FIXME: CMU CL had a hairier implementation of this. It had the
+ ;; small problem that it didn't work for large (>16M) values of
+ ;; SRC-START or DST-START. However, it might have been more
+ ;; efficient. In particular, I haven't checked how much the foreign
+ ;; function call costs us here. My guess is that if the overhead is
+ ;; acceptable for SQRT and COS, it's acceptable here, but this
+ ;; should probably be checked. -- WHN
+ (once-only ((dst-start dst-start))
+ `(flet ((sap (thing)
+ (etypecase thing
+ (system-area-pointer thing)
+ ((simple-unboxed-array (*)) (vector-sap thing)))))
+ (declare (inline sap))
+ (without-gcing
+ (memmove (sap+ (sap ,dst) ,dst-start)
+ (sap+ (sap ,src) ,src-start)
+ (- ,dst-end ,dst-start))))))