From d1355f6b79af346f05cf21c18637e269e0b499a1 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Fri, 29 Oct 2004 09:00:35 +0000 Subject: [PATCH] 0.8.16.14: External format support ... not latin9, though -- need to think about how that might work in a character-poor sbcl. ... delete accented characters in comments from package-locks.impure.lisp -- Something Will Have To Be Done This patch brought to you by the letters U, T, F and the number 8. --- CREDITS | 8 +- NEWS | 4 + package-data-list.lisp-expr | 2 +- src/code/fd-stream.lisp | 466 +++++++++++++++++++++++++++++++++++--- src/code/host-c-call.lisp | 30 +++ src/code/target-c-call.lisp | 99 ++++++++ src/code/target-load.lisp | 20 +- src/compiler/fndb.lisp | 2 +- src/runtime/runtime.c | 3 + tests/package-locks.impure.lisp | 2 +- tools-for-build/grovel-headers.c | 4 + tools-for-build/ldso-stubs.lisp | 1 + version.lisp-expr | 2 +- 13 files changed, 603 insertions(+), 40 deletions(-) diff --git a/CREDITS b/CREDITS index 59f39eb..5c05a17 100644 --- a/CREDITS +++ b/CREDITS @@ -557,6 +557,11 @@ Miles Egan: He creates binary packages of SBCL releases for Red Hat and other (which?) platforms. +Andreas Fuchs: + He provides infrastructure for monitoring build and performance + regressions of SBCL. He assisted with the integration of the + Unicode work. + Nathan Froyd: He has fixed various bugs, and also done a lot of internal cleanup, not visible at the user level but important for @@ -579,7 +584,8 @@ Espen S Johnsen: Teemu Kalvas: He worked on Unicode support for SBCL, including parsing the Unicode - character database and restoring the FAST-READ-CHAR optimization. + character database, restoring the FAST-READ-CHAR optimization and + developing external format support. Frederik Kuivinen: He showed how to implement the DEBUG-RETURN functionality. diff --git a/NEWS b/NEWS index e880cfa..09d4da5 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,8 @@ changes in sbcl-0.8.17 relative to sbcl-0.8.16: + * the system now has rudimentary external-format support; the + primary user-visible change at this time is that characters with + the high bit set (such as lower-case-e-acute) will print correctly + to a terminal in a UTF-8 environment. * minor incompatible change: BASE-CHAR no longer names a class; however, CHARACTER continues to do so, as required by ANSI. * minor incompatible change: SB-DEBUG:*DEBUG-PRINT-FOO* variables diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index c04bdf1..8617574 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1900,7 +1900,7 @@ no guarantees of interface stability." "EUSERS" "EVICEERR" "EVICEOP" "EWOULDBLOCK" "EXDEV" "FD-ISSET" "FD-SET" "LTCHARS" "UNIX-FAST-SELECT" - "UNIX-FILE-KIND" "UNIX-KILL" + "UNIX-FILE-KIND" "UNIX-KILL" "CODESET" "TCSETPGRP" "FD-ZERO" "FD-CLR" "CHECK" "UNIX-RESOLVE-LINKS" "FD-SETSIZE" "TCGETPGRP" "UNIX-FAST-GETRUSAGE" "UNIX-SIMPLIFY-PATHNAME" "UNIX-KILLPG" diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index fb2c9ff..a6fdf1a 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -86,7 +86,9 @@ ;; timeout specified for this stream, or NIL if none (timeout nil :type (or index null)) ;; pathname of the file this stream is opened to (returned by PATHNAME) - (pathname nil :type (or pathname null))) + (pathname nil :type (or pathname null)) + (external-format :default) + (output-bytes #'ill-out :type function)) (def!method print-object ((fd-stream file-stream) stream) (declare (type stream stream)) (print-unreadable-object (fd-stream stream :type t :identity t) @@ -197,6 +199,32 @@ (frob-output stream (fd-stream-obuf-sap stream) 0 length t) (setf (fd-stream-obuf-tail stream) 0)))) +(defmacro output-wrapper/variable-width ((stream size buffering) + &body body) + (let ((stream-var (gensym))) + `(let ((,stream-var ,stream) + (size ,size)) + ,(unless (eq (car buffering) :none) + `(when (< (fd-stream-obuf-length ,stream-var) + (+ (fd-stream-obuf-tail ,stream-var) + size)) + (flush-output-buffer ,stream-var))) + ,(unless (eq (car buffering) :none) + `(when (> (fd-stream-ibuf-tail ,stream-var) + (fd-stream-ibuf-head ,stream-var)) + (file-position ,stream-var (file-position ,stream-var)))) + + ,@body + (incf (fd-stream-obuf-tail ,stream-var) size) + ,(ecase (car buffering) + (:none + `(flush-output-buffer ,stream-var)) + (:line + `(when (eq (char-code byte) (char-code #\Newline)) + (flush-output-buffer ,stream-var))) + (:full)) + (values)))) + (defmacro output-wrapper ((stream size buffering) &body body) (let ((stream-var (gensym))) `(let ((,stream-var ,stream)) @@ -221,6 +249,32 @@ (:full)) (values)))) +(defmacro def-output-routines/variable-width ((name-fmt size external-format + &rest bufferings) + &body body) + (declare (optimize (speed 1))) + (cons 'progn + (mapcar + (lambda (buffering) + (let ((function + (intern (let ((*print-case* :upcase)) + (format nil name-fmt (car buffering)))))) + `(progn + (defun ,function (stream byte) + (output-wrapper/variable-width (stream ,size ,buffering) + ,@body)) + (setf *output-routines* + (nconc *output-routines* + ',(mapcar + (lambda (type) + (list type + (car buffering) + function + 1 + external-format)) + (cdr buffering))))))) + bufferings))) + ;;; Define output routines that output numbers SIZE bytes long for the ;;; given bufferings. Use BODY to do the actual output. (defmacro def-output-routines ((name-fmt size &rest bufferings) &body body) @@ -242,7 +296,8 @@ (list type (car buffering) function - size)) + size + nil)) (cdr buffering))))))) bufferings))) @@ -389,6 +444,8 @@ :from-end t :start start :end end)))) + (if (and (typep thing 'base-string) + (eq (fd-stream-external-format stream) :latin-1)) (ecase (fd-stream-buffering stream) (:full (output-raw-bytes stream thing start end)) @@ -398,6 +455,13 @@ (flush-output-buffer stream))) (:none (frob-output stream thing start end nil))) + (ecase (fd-stream-buffering stream) + (:full (funcall (fd-stream-output-bytes stream) + stream thing nil start end)) + (:line (funcall (fd-stream-output-bytes stream) + stream thing last-newline start end)) + (:none (funcall (fd-stream-output-bytes stream) + stream thing t start end)))) (if last-newline (setf (fd-stream-char-pos stream) (- end last-newline 1)) @@ -409,17 +473,38 @@ (:none (frob-output stream thing start end nil)))))) +(defvar *external-formats* () + #!+sb-doc + "List of all available external formats. Each element is a list of the + element-type, string input function name, character input function name, + and string output function name.") + ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the ;;; number of bytes per element. -(defun pick-output-routine (type buffering) +(defun pick-output-routine (type buffering &optional external-format) + (when (subtypep type 'character) + (dolist (entry *external-formats*) + (when (member external-format (first entry)) + (return-from pick-output-routine + (values (symbol-function (nth (ecase buffering + (:none 4) + (:line 5) + (:full 6)) + entry)) + 'character + 1 + (symbol-function (fourth entry)) + (first (first entry))))))) (dolist (entry *output-routines*) - (when (and (subtypep type (car entry)) - (eq buffering (cadr entry))) + (when (and (subtypep type (first entry)) + (eq buffering (second entry)) + (or (not (fifth entry)) + (eq external-format (fifth entry)))) (return-from pick-output-routine - (values (symbol-function (caddr entry)) - (car entry) - (cadddr entry))))) + (values (symbol-function (third entry)) + (first entry) + (fourth entry))))) ;; KLUDGE: dealing with the buffering here leads to excessive code ;; explosion. ;; @@ -561,6 +646,31 @@ (return)) (frob-input ,stream-var))))) +(defmacro input-wrapper/variable-width ((stream bytes eof-error eof-value) + &body read-forms) + (let ((stream-var (gensym)) + (element-var (gensym))) + `(let ((,stream-var ,stream) + (size nil)) + (if (fd-stream-unread ,stream-var) + (prog1 + (fd-stream-unread ,stream-var) + (setf (fd-stream-unread ,stream-var) nil) + (setf (fd-stream-listen ,stream-var) nil)) + (let ((,element-var + (catch 'eof-input-catcher + (input-at-least ,stream-var 1) + (let* ((byte (sap-ref-8 (fd-stream-ibuf-sap ,stream-var) + (fd-stream-ibuf-head ,stream-var)))) + (setq size ,bytes) + (input-at-least ,stream-var size) + (locally ,@read-forms))))) + (cond (,element-var + (incf (fd-stream-ibuf-head ,stream-var) size) + ,element-var) + (t + (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) + ;;; a macro to wrap around all input routines to handle EOF-ERROR noise (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms) (let ((stream-var (gensym)) @@ -581,6 +691,19 @@ (t (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) +(defmacro def-input-routine/variable-width (name + (type external-format size sap head) + &rest body) + `(progn + (defun ,name (stream eof-error eof-value) + (input-wrapper/variable-width (stream ,size eof-error eof-value) + (let ((,sap (fd-stream-ibuf-sap stream)) + (,head (fd-stream-ibuf-head stream))) + ,@body))) + (setf *input-routines* + (nconc *input-routines* + (list (list ',type ',name 1 ',external-format)))))) + (defmacro def-input-routine (name (type size sap head) &rest body) @@ -592,7 +715,7 @@ ,@body))) (setf *input-routines* (nconc *input-routines* - (list (list ',type ',name ',size)))))) + (list (list ',type ',name ',size nil)))))) ;;; STREAM-IN routine for reading a string char (def-input-routine input-character @@ -629,16 +752,29 @@ ((signed-byte 32) 4 sap head) (signed-sap-ref-32 sap head)) + + ;;; Find an input routine to use given the type. Return as multiple ;;; values the routine, the real type transfered, and the number of -;;; bytes per element. -(defun pick-input-routine (type) +;;; bytes per element (and for character types string input routine). +(defun pick-input-routine (type &optional external-format) + (when (subtypep type 'character) + (dolist (entry *external-formats*) + (when (member external-format (first entry)) + (return-from pick-input-routine + (values (symbol-function (third entry)) + 'character + 1 + (symbol-function (second entry)) + (first (first entry))))))) (dolist (entry *input-routines*) - (when (subtypep type (car entry)) + (when (and (subtypep type (first entry)) + (or (not (fourth entry)) + (eq external-format (fourth entry)))) (return-from pick-input-routine - (values (symbol-function (cadr entry)) - (car entry) - (caddr entry))))) + (values (symbol-function (second entry)) + (first entry) + (third entry))))) ;; FIXME: let's do it the hard way, then (but ignore things like ;; endianness, efficiency, and the necessary coupling between these ;; and the output routines). -- CSR, 2004-02-09 @@ -766,6 +902,247 @@ (fd-stream-ibuf-head stream) new-head (fd-stream-ibuf-tail stream) (+ count new-head)) count))) + +(defmacro define-external-format (external-format size out-expr in-expr) + (let* ((name (first external-format)) + (out-function (intern (let ((*print-case* :upcase)) + (format nil "OUTPUT-BYTES/~A" name)))) + (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" name)) + (in-function (intern (let ((*print-case* :upcase)) + (format nil "FD-STREAM-READ-N-CHARACTERS/~A" + name)))) + (in-char-function (intern (let ((*print-case* :upcase)) + (format nil "INPUT-CHAR/~A" name))))) + `(progn + (defun ,out-function (fd-stream string flush-p start end) + (let ((start (or start 0)) + (end (or end (length string)))) + (declare (type index start end)) + (when (> (fd-stream-ibuf-tail fd-stream) + (fd-stream-ibuf-head fd-stream)) + (file-position fd-stream (file-position fd-stream))) + (when (< end start) + (error ":END before :START!")) + (do () + ((= end start)) + (setf (fd-stream-obuf-tail fd-stream) + (do* ((len (fd-stream-obuf-length fd-stream)) + (sap (fd-stream-obuf-sap fd-stream)) + (tail (fd-stream-obuf-tail fd-stream))) + ((or (= start end) (< (- len tail) 4)) tail) + (let* ((byte (aref string start)) + (bits (char-code byte))) + ,out-expr + (incf tail ,size) + (incf start)))) + (when (< start end) + (flush-output-buffer fd-stream))) + (when flush-p + (flush-output-buffer fd-stream)))) + (def-output-routines (,format + ,size + (:none character) + (:line character) + (:full character)) + (if (char= byte #\Newline) + (setf (fd-stream-char-pos stream) 0) + (incf (fd-stream-char-pos stream))) + (let ((bits (char-code byte)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + ,out-expr)) + (defun ,in-function (stream buffer start requested eof-error-p + &aux (total-copied 0)) + (declare (type file-stream stream)) + (declare (type index start requested total-copied)) + (let ((unread (fd-stream-unread stream))) + (when unread + (setf (aref buffer start) unread) + (setf (fd-stream-unread stream) nil) + (setf (fd-stream-listen stream) nil) + (incf total-copied))) + (do () + (nil) + (let* ((head (fd-stream-ibuf-head stream)) + (tail (fd-stream-ibuf-tail stream)) + (sap (fd-stream-ibuf-sap stream))) + (declare (type index head tail)) + ;; Copy data from stream buffer into user's buffer. + (do () + ((or (= tail head) (= requested total-copied))) + (let* ((byte (sap-ref-8 sap head))) + (when (> ,size (- tail head)) + (return)) + (setf (aref buffer (+ start total-copied)) ,in-expr) + (incf total-copied) + (incf head ,size))) + (setf (fd-stream-ibuf-head stream) head) + ;; Maybe we need to refill the stream buffer. + (cond ( ;; If there were enough data in the stream buffer, we're done. + (= total-copied requested) + (return total-copied)) + ( ;; If EOF, we're done in another way. + (zerop (refill-fd-stream-buffer stream)) + (if eof-error-p + (error 'end-of-file :stream stream) + (return total-copied))) + ;; Otherwise we refilled the stream buffer, so fall + ;; through into another pass of the loop. + )))) + (def-input-routine ,in-char-function (character ,size sap head) + (let ((byte (sap-ref-8 sap head))) + ,in-expr)) + (setf *external-formats* + (cons '(,external-format ,in-function ,in-char-function ,out-function + ,@(mapcar #'(lambda (buffering) + (intern (let ((*print-case* :upcase)) + (format nil format buffering)))) + '(:none :line :full))) + *external-formats*))))) + +(defmacro define-external-format/variable-width (external-format out-size-expr + out-expr in-size-expr in-expr) + (let* ((name (first external-format)) + (out-function (intern (let ((*print-case* :upcase)) + (format nil "OUTPUT-BYTES/~A" name)))) + (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" name)) + (in-function (intern (let ((*print-case* :upcase)) + (format nil "FD-STREAM-READ-N-CHARACTERS/~A" + name)))) + (in-char-function (intern (let ((*print-case* :upcase)) + (format nil "INPUT-CHAR/~A" name))))) + `(progn + (defun ,out-function (fd-stream string flush-p start end) + (let ((start (or start 0)) + (end (or end (length string)))) + (declare (type index start end)) + (when (> (fd-stream-ibuf-tail fd-stream) + (fd-stream-ibuf-head fd-stream)) + (file-position fd-stream (file-position fd-stream))) + (when (< end start) + (error ":END before :START!")) + (do () + ((= end start)) + (setf (fd-stream-obuf-tail fd-stream) + (do* ((len (fd-stream-obuf-length fd-stream)) + (sap (fd-stream-obuf-sap fd-stream)) + (tail (fd-stream-obuf-tail fd-stream))) + ((or (= start end) (< (- len tail) 4)) tail) + (let* ((byte (aref string start)) + (bits (char-code byte)) + (size ,out-size-expr)) + ,out-expr + (incf tail size) + (incf start)))) + (when (< start end) + (flush-output-buffer fd-stream))) + (when flush-p + (flush-output-buffer fd-stream)))) + (def-output-routines/variable-width (,format + ,out-size-expr + ,external-format + (:none character) + (:line character) + (:full character)) + (if (char= byte #\Newline) + (setf (fd-stream-char-pos stream) 0) + (incf (fd-stream-char-pos stream))) + (let ((bits (char-code byte)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + ,out-expr)) + (defun ,in-function (stream buffer start requested eof-error-p + &aux (total-copied 0)) + (declare (type file-stream stream)) + (declare (type index start requested total-copied)) + (let ((unread (fd-stream-unread stream))) + (when unread + (setf (aref buffer start) unread) + (setf (fd-stream-unread stream) nil) + (setf (fd-stream-listen stream) nil) + (incf total-copied))) + (do () + (nil) + (let* ((head (fd-stream-ibuf-head stream)) + (tail (fd-stream-ibuf-tail stream)) + (sap (fd-stream-ibuf-sap stream))) + (declare (type index head tail)) + ;; Copy data from stream buffer into user's buffer. + (do () + ((or (= tail head) (= requested total-copied))) + (let* ((byte (sap-ref-8 sap head)) + (size ,in-size-expr)) + (when (> size (- tail head)) + (return)) + (setf (aref buffer (+ start total-copied)) ,in-expr) + (incf total-copied) + (incf head size))) + (setf (fd-stream-ibuf-head stream) head) + ;; Maybe we need to refill the stream buffer. + (cond ( ;; If there were enough data in the stream buffer, we're done. + (= total-copied requested) + (return total-copied)) + ( ;; If EOF, we're done in another way. + (zerop (refill-fd-stream-buffer stream)) + (if eof-error-p + (error 'end-of-file :stream stream) + (return total-copied))) + ;; Otherwise we refilled the stream buffer, so fall + ;; through into another pass of the loop. + )))) + (def-input-routine/variable-width ,in-char-function (character + ,external-format + ,in-size-expr + sap head) + (let ((byte (sap-ref-8 sap head))) + ,in-expr)) + (setf *external-formats* + (cons '(,external-format ,in-function ,in-char-function ,out-function + ,@(mapcar #'(lambda (buffering) + (intern (let ((*print-case* :upcase)) + (format nil format buffering)))) + '(:none :line :full))) + *external-formats*))))) + +(define-external-format (:latin-1 :latin1 :iso-8859-1 + ;; FIXME: shouldn't ASCII-like things have an + ;; extra typecheck for 7-bitness? + :ascii :us-ascii :ansi_x3.4-1968) + 1 + (setf (sap-ref-8 sap tail) bits) + (code-char byte)) + +(define-external-format/variable-width (:utf-8 :utf8) + (let ((bits (char-code byte))) + (cond ((< bits #x80) 1) + ((< bits #x800) 2) + ((< bits #x10000) 3) + (t 4))) + (ecase size + (1 (setf (sap-ref-8 sap tail) bits)) + (2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 0) bits)))) + (3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits)))) + (4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 12) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits))))) + (cond ((< byte #x80) 1) + ((< byte #xe0) 2) + ((< byte #xf0) 3) + (t 4)) + (code-char (ecase size + (1 byte) + (2 (dpb byte (byte 5 6) (sap-ref-8 sap (1+ head)))) + (3 (dpb byte (byte 4 12) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 6) + (sap-ref-8 sap (+ 2 head))))) + (4 (dpb byte (byte 3 18) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 12) + (dpb (sap-ref-8 sap (+ 2 head)) (byte 6 6) + (sap-ref-8 sap (+ 3 head))))))))) ;;;; utility functions (misc routines, etc) @@ -793,9 +1170,28 @@ (push (fd-stream-ibuf-sap fd-stream) *available-buffers*) (setf (fd-stream-ibuf-sap fd-stream) nil)) + (when (and character-stream-p + (eq (fd-stream-external-format fd-stream) :default)) + (setf (fd-stream-external-format fd-stream) + (intern (or (alien-funcall + (extern-alien "nl_langinfo" + (function c-string int)) + sb!unix:codeset) + "LATIN-1") + "KEYWORD"))) + (dolist (entry *external-formats* + (setf (fd-stream-external-format fd-stream) :latin-1)) + (when (member (fd-stream-external-format fd-stream) (first entry)) + (return))) + (when input-p - (multiple-value-bind (routine type size) - (pick-input-routine target-type) + (multiple-value-bind (routine type size read-n-characters + normalized-external-format) + (pick-input-routine target-type + (fd-stream-external-format fd-stream)) + (when normalized-external-format + (setf (fd-stream-external-format fd-stream) + normalized-external-format)) (unless routine (error "could not find any input routine for ~S" target-type)) (setf (fd-stream-ibuf-sap fd-stream) (next-available-buffer)) @@ -807,7 +1203,10 @@ (setf (fd-stream-in fd-stream) #'ill-in (fd-stream-bin fd-stream) routine)) (when (eql size 1) - (setf (fd-stream-n-bin fd-stream) #'fd-stream-read-n-bytes) + (setf (fd-stream-n-bin fd-stream) + (if character-stream-p + read-n-characters + #'fd-stream-read-n-bytes)) (when (and buffer-p ;; We only create this buffer for streams of type ;; (unsigned-byte 8). Because there's no buffer, the @@ -830,8 +1229,14 @@ (setf input-type type))) (when output-p - (multiple-value-bind (routine type size) - (pick-output-routine target-type (fd-stream-buffering fd-stream)) + (multiple-value-bind (routine type size output-bytes + normalized-external-format) + (pick-output-routine target-type + (fd-stream-buffering fd-stream) + (fd-stream-external-format fd-stream)) + (when normalized-external-format + (setf (fd-stream-external-format fd-stream) + normalized-external-format)) (unless routine (error "could not find any output routine for ~S buffered ~S" (fd-stream-buffering fd-stream) @@ -839,13 +1244,15 @@ (setf (fd-stream-obuf-sap fd-stream) (next-available-buffer)) (setf (fd-stream-obuf-length fd-stream) bytes-per-buffer) (setf (fd-stream-obuf-tail fd-stream) 0) - (if (subtypep type 'character) + (when character-stream-p + (setf (fd-stream-output-bytes fd-stream) output-bytes)) + (if character-stream-p (setf (fd-stream-out fd-stream) routine (fd-stream-bout fd-stream) #'ill-bout) (setf (fd-stream-out fd-stream) (or (if (eql size 1) - (pick-output-routine 'base-char - (fd-stream-buffering fd-stream))) + (pick-output-routine + 'base-char (fd-stream-buffering fd-stream))) #'ill-out) (fd-stream-bout fd-stream) routine)) (setf (fd-stream-sout fd-stream) @@ -1129,6 +1536,7 @@ (output nil output-p) (element-type 'base-char) (buffering :full) + (external-format :default) timeout file original @@ -1152,6 +1560,7 @@ :delete-original delete-original :pathname pathname :buffering buffering + :external-format external-format :timeout timeout))) (set-fd-stream-routines stream element-type input output input-buffer-p) (when (and auto-close (fboundp 'finalize)) @@ -1215,8 +1624,6 @@ :IF-DOES-NOT-EXIST - one of :ERROR, :CREATE or NIL See the manual for details." - (declare (ignore external-format)) ; FIXME: CHECK-TYPE? WARN-if-not? - ;; Calculate useful stuff. (multiple-value-bind (input output mask) (case direction @@ -1341,6 +1748,7 @@ :input input :output output :element-type element-type + :external-format external-format :file namestring :original original :delete-original delete-original @@ -1455,7 +1863,9 @@ (string (length object)))) (defun stream-external-format (stream) - (declare (type file-stream stream) (ignore stream)) + (declare (type file-stream stream)) #!+sb-doc - "Return :DEFAULT." - :default) + "Return the actual external format for file-streams, otherwise :DEFAULT." + (if (typep stream 'file-stream) + (fd-stream-external-format stream) + :default)) diff --git a/src/code/host-c-call.lisp b/src/code/host-c-call.lisp index b7730f1..4ad089a 100644 --- a/src/code/host-c-call.lisp +++ b/src/code/host-c-call.lisp @@ -39,4 +39,34 @@ ((alien (* char)) (alien-sap ,value)) (simple-base-string (vector-sap ,value)))) +(/show0 "host-c-call.lisp 42") + +(define-alien-type-class (utf8-string :include pointer :include-args (to))) + +(define-alien-type-translator utf8-string () + (make-alien-utf8-string-type + :to (parse-alien-type 'char (sb!kernel:make-null-lexenv)))) + +(define-alien-type-method (utf8-string :unparse) (type) + (declare (ignore type)) + 'utf8-string) + +(define-alien-type-method (utf8-string :lisp-rep) (type) + (declare (ignore type)) + '(or simple-string null (alien (* char)))) + +(define-alien-type-method (utf8-string :naturalize-gen) (type alien) + (declare (ignore type)) + `(if (zerop (sap-int ,alien)) + nil + (%naturalize-utf8-string ,alien))) + +(define-alien-type-method (utf8-string :deport-gen) (type value) + (declare (ignore type)) + `(etypecase ,value + (null (int-sap 0)) + ((alien (* char)) (alien-sap ,value)) + (simple-base-string (vector-sap ,value)) + (simple-string (vector-sap (%deport-utf8-string ,value))))) + (/show0 "host-c-call.lisp end of file") diff --git a/src/code/target-c-call.lisp b/src/code/target-c-call.lisp index 3292453..e35489e 100644 --- a/src/code/target-c-call.lisp +++ b/src/code/target-c-call.lisp @@ -35,6 +35,10 @@ (define-alien-type-translator void () (parse-alien-type '(values) (sb!kernel:make-null-lexenv))) +;;; FIXME: %NATURALIZE-C-STRING (and the UTF8 siblings below) would +;;; appear to be vulnerable to the lisp string moving from underneath +;;; them if the world undergoes a GC, possibly triggered by another +;;; thread. Ugh. (defun %naturalize-c-string (sap) (declare (type system-area-pointer sap)) (locally @@ -48,3 +52,98 @@ sb!vm:n-word-bits) (* length sb!vm:n-byte-bits)) result)))) + +(defun %naturalize-utf8-string (sap) + (declare (type system-area-pointer sap)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let ((length (do* ((offset 0) + (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset)) + (index 0 (1+ index))) + ((zerop byte) index) + (declare (type fixnum offset index)) + (cond + ;; FIXME: Here, and below, we don't defend + ;; against malformed utf-8 with any degree of + ;; rigour. + ((< byte #x80) (incf offset)) + ((< byte #xe0) (incf offset 2)) + ((< byte #xf0) (incf offset 3)) + (t (incf offset 4)))))) + (let ((result (make-string length :element-type 'character))) + (do* ((offset 0) + (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset)) + (index 0 (1+ index))) + ((>= index length) result) + (declare (type fixnum offset index)) + (setf (char result index) + (cond + ((< byte #x80) + (prog1 (code-char byte) (incf offset))) + ((< byte #xe0) + (prog1 (code-char (dpb byte (byte 5 6) + (sap-ref-8 sap (1+ offset)))) + (incf offset 2))) + ((< byte #xf0) + (prog1 (code-char + (dpb byte (byte 4 12) + (dpb (sap-ref-8 sap (1+ offset)) (byte 6 6) + (sap-ref-8 sap (+ 2 offset))))) + (incf offset 3))) + (t + (prog1 + (code-char + (dpb byte (byte 3 18) + (dpb (sap-ref-8 sap (1+ offset)) (byte 6 12) + (dpb (sap-ref-8 sap (+ 2 offset)) (byte 6 6) + (sap-ref-8 sap (+ 3 offset)))))) + (incf offset 4)))))))))) + +(defun %deport-utf8-string (string) + (declare (type simple-string string)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let ((length (1+ (do* ((offset 0) + (length (length string)) + (index 0 (1+ index))) + ((= index length) offset) + (declare (type fixnum offset)) + (let ((bits (char-code (char string index)))) + (cond + ((< bits #x80) (incf offset 1)) + ((< bits #x800) (incf offset 2)) + ((< bits #x10000) (incf offset 3)) + (t (incf offset 4)))))))) + (let ((vector (make-array length :element-type '(unsigned-byte 8) + :initial-element 0))) + (do* ((offset 0) + (length (length string)) + (index 0 (1+ index))) + ((= index length) vector) + (declare (type fixnum offset)) + (let ((bits (char-code (char string index)))) + (cond + ((< bits #x80) + (setf (aref vector offset) bits) + (incf offset)) + ((< bits #x800) + (setf (aref vector offset) (logior #xc0 (ldb (byte 5 6) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 2)) + ((< bits #x10000) + (setf (aref vector offset) (logior #xe0 (ldb (byte 4 12) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 6) bits))) + (setf (aref vector (+ offset 2)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 3)) + (t + (setf (aref vector offset) (logior #xf0 (ldb (byte 3 18) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 12) bits))) + (setf (aref vector (+ offset 2)) + (logior #x80 (ldb (byte 6 6) bits))) + (setf (aref vector (+ offset 3)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 4))))))))) diff --git a/src/code/target-load.lisp b/src/code/target-load.lisp index 87403ba..f30b8f1 100644 --- a/src/code/target-load.lisp +++ b/src/code/target-load.lisp @@ -80,14 +80,20 @@ :element-type '(unsigned-byte 8)) (load-as-fasl stream verbose print))) (t - (let ((first-line (with-open-file (stream truename :direction :input) - (read-line stream nil))) - (fhsss *fasl-header-string-start-string*)) + (let* ((fhsss *fasl-header-string-start-string*) + (first-line (make-array (length fhsss) + :element-type '(unsigned-byte 8))) + (read-length + (with-open-file (stream truename + :direction :input + :element-type '(unsigned-byte 8)) + (read-sequence first-line stream)))) (cond - ((and first-line - (>= (length (the simple-string first-line)) - (length fhsss)) - (string= first-line fhsss :end1 (length fhsss))) + ((and (= read-length (length fhsss)) + (do ((i 0 (1+ i))) + ((= i read-length) t) + (when (/= (char-code (aref fhsss i)) (aref first-line i)) + (return)))) (internal-load pathname truename if-does-not-exist verbose print :binary)) (t diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index 8ab1e06..9bc3d95 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -1192,7 +1192,7 @@ :rename-and-delete :overwrite :append :supersede nil)) (:if-does-not-exist (member :error :create nil)) - (:external-format (member :default))) + (:external-format keyword)) (or stream null)) (defknown rename-file (pathname-designator filename) diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index 545fb35..6a9765b 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -28,6 +28,7 @@ #include #include #include +#include #if defined(SVR4) || defined(__linux__) #include @@ -189,6 +190,8 @@ main(int argc, char *argv[], char *envp[]) lispobj initial_function; + setlocale(LC_ALL, ""); + /* KLUDGE: os_vm_page_size is set by os_init(), and on some * systems (e.g. Alpha) arch_init() needs need os_vm_page_size, so * it must follow os_init(). -- WHN 2000-01-26 */ diff --git a/tests/package-locks.impure.lisp b/tests/package-locks.impure.lisp index 427a8ec..12e533f 100644 --- a/tests/package-locks.impure.lisp +++ b/tests/package-locks.impure.lisp @@ -450,7 +450,7 @@ (assert (trace test:function :break t)) ;;;; No bogus violations from defclass with accessors in a locked -;;;; package. Reported by by François-René Rideau. +;;;; package. Reported by by Francois-Rene Rideau. (assert (package-locked-p :sb-gray)) (multiple-value-bind (fun compile-errors) (ignore-errors diff --git a/tools-for-build/grovel-headers.c b/tools-for-build/grovel-headers.c index dc50054..b117233 100644 --- a/tools-for-build/grovel-headers.c +++ b/tools-for-build/grovel-headers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "genesis/config.h" @@ -72,6 +73,9 @@ main(int argc, char *argv[]) printf("(in-package \"SB!UNIX\")\n\n"); + printf(";;; langinfo\n"); + defconstant("codeset", CODESET); + printf(";;; types, types, types\n"); DEFTYPE("clock-t", clock_t); DEFTYPE("dev-t", dev_t); diff --git a/tools-for-build/ldso-stubs.lisp b/tools-for-build/ldso-stubs.lisp index bf5b04e..25bc891 100644 --- a/tools-for-build/ldso-stubs.lisp +++ b/tools-for-build/ldso-stubs.lisp @@ -198,6 +198,7 @@ ldso_stub__ ## fct: ; \\ "malloc" "memmove" "mkdir" + "nl_langinfo" "open" "opendir" "pipe" diff --git a/version.lisp-expr b/version.lisp-expr index c1f55ed..1a2b82e 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.16.13" +"0.8.16.14" -- 1.7.10.4