Windows: Use overlapped I/O, CreateFile
authorDavid Lichteblau <david@lichteblau.com>
Tue, 18 Sep 2012 15:12:19 +0000 (17:12 +0200)
committerDavid Lichteblau <david@lichteblau.com>
Fri, 19 Oct 2012 16:07:24 +0000 (18:07 +0200)
Overlapped I/O is win32's asynchronous I/O mechanism, which allows
us to start an I/O operation and explicitly wait for it to finish at
a time of our choosing, such that we can simultaneously await other
events instead of blocking unconditionally.

  - Support for overlapped I/O is a per-HANDLE flag specified at
    file opening time, necessitating a switch to win32's CreateFile
    and away from the CRT's _open.

  - Wrap win32 file operations in POSIX-compatible functions, so
    that UNIX-OPEN, UNIX-READ, UNIX-WRITE, UNIX-CLOSE continue to
    work as before.  Under the hood, these now call our Lisp or C
    functions instead of versions from CRT.

  - For now, these functions still return and expect what passes as
    file descriptors in CRT.

  - INTERRUPT-THREAD is now capable of performing the interruption
    in a target thread blocked in socket I/O, indicated using an
    errno of "EINTR".  Minor changes in FD streams to retry the I/O
    operation explicitly in that case.

Does not yet include changes for console I/O, and instead still
falls back to _read and _write in that case.  Also not yet included
is interruptible non-overlapped I/O, e.g. for unnamed pipes.

Thanks to Anton Kovalenko.

contrib/sb-bsd-sockets/win32-sockets.lisp
package-data-list.lisp-expr
src/code/fd-stream.lisp
src/code/unix.lisp
src/code/win32.lisp
src/runtime/Config.x86-win32
src/runtime/print.c
src/runtime/runtime.h
src/runtime/safepoint.c
src/runtime/win32-os.c
tools-for-build/grovel-headers.c

index 7aa765a..2e73295 100644 (file)
 ;;;; package where we will redefine all of the above
 ;;;; functions, converting between HANDLES and fds
 
+(defconstant WSA_FLAG_OVERLAPPED 1)
+
 (defun socket (af type proto)
-  (let* ((handle (wsa-socket af type proto nil 0 0))
+  (let* ((handle (wsa-socket af type proto nil 0 WSA_FLAG_OVERLAPPED))
          (fd (handle->fd handle 0)))
     fd))
 
index b5fc04f..2c3552a 100644 (file)
@@ -2912,6 +2912,8 @@ SBCL itself"
                "PEEK-CONSOLE-INPUT"
                "PEEK-NAMED-PIPE"
                "READ-FILE"
+               "UNIXLIKE-CLOSE"
+               "UNIXLIKE-OPEN"
                "UNMAP-VIEW-OF-FILE"
                "WRITE-FILE"
                "WITH-PROCESS-TIMES")))
index 2f4e9e6..d6c9b4c 100644 (file)
                               ((eql errno sb!unix:ewouldblock)
                                ;; Blocking, queue or wair.
                                (queue-or-wait))
+                              ;; if interrupted on win32, just try again
+                              #!+win32 ((eql errno sb!unix:eintr))
                               (t
                                (simple-stream-perror "Couldn't write to ~s"
                                                      stream errno)))))))))))))
            (errno 0)
            (count 0))
     (tagbody
+       #!+win32
+       (go :main)
+
        ;; Check for blocking input before touching the stream if we are to
        ;; serve events: if the FD is blocking, we don't want to try an uninterruptible
        ;; read(). Regular files should never block, so we can elide the check.
        ((lambda (return-reason)
           (ecase return-reason
             ((nil))                     ; fast path normal cases
-            ((:wait-for-input) (go :wait-for-input))
+            ((:wait-for-input) (go #!-win32 :wait-for-input #!+win32 :main))
             ((:closed-flame)   (go :closed-flame))
             ((:read-error)     (go :read-error))))
         (without-interrupts
                 (setf (values count errno)
                       (sb!unix:unix-read fd (sap+ sap tail) (- length tail)))
                 (cond ((null count)
-                       #!+win32
-                       (return :read-error)
-                       #!-win32
-                       (if (eql errno sb!unix:ewouldblock)
+                       (if (eql errno
+                                #!+win32 sb!unix:eintr
+                                #!-win32 sb!unix:ewouldblock)
                            (return :wait-for-input)
                            (return :read-error)))
                       ((zerop count)
index 7a1a628..ed7be90 100644 (file)
@@ -158,6 +158,8 @@ corresponds to NAME, or NIL if there is none."
   (declare (type unix-pathname path)
            (type fixnum flags)
            (type unix-file-mode mode))
+  #!+win32 (sb!win32:unixlike-open path flags mode)
+  #!-win32
   (with-restarted-syscall (value errno)
     (int-syscall ("open" c-string int int)
                  path
@@ -170,8 +172,9 @@ corresponds to NAME, or NIL if there is none."
 ;;; associated with it.
 (/show0 "unix.lisp 391")
 (defun unix-close (fd)
-  (declare (type unix-fd fd))
-  (void-syscall ("close" int) fd))
+  #!+win32 (sb!win32:unixlike-close fd)
+  #!-win32 (declare (type unix-fd fd))
+  #!-win32 (void-syscall ("close" int) fd))
 \f
 ;;;; stdlib.h
 
@@ -315,7 +318,8 @@ corresponds to NAME, or NIL if there is none."
 (defun unix-read (fd buf len)
   (declare (type unix-fd fd)
            (type (unsigned-byte 32) len))
-  (int-syscall ("read" int (* char) int) fd buf len))
+  (int-syscall (#!-win32 "read" #!+win32 "win32_unix_read"
+                int (* char) int) fd buf len))
 
 ;;; UNIX-WRITE accepts a file descriptor, a buffer, an offset, and the
 ;;; length to write. It attempts to write len bytes to the device
@@ -326,7 +330,8 @@ corresponds to NAME, or NIL if there is none."
            (type (unsigned-byte 32) offset len))
   (flet ((%write (sap)
            (declare (system-area-pointer sap))
-           (int-syscall ("write" int (* char) int)
+           (int-syscall (#!-win32 "write" #!+win32 "win32_unix_write"
+                         int (* char) int)
                         fd
                         (with-alien ((ptr (* char) sap))
                           (addr (deref ptr offset)))
index 1f8e7ed..944d206 100644 (file)
@@ -21,6 +21,9 @@
 ;;; but groveling HANDLE makes it unsigned, which currently breaks the
 ;;; build. --NS 2006-06-18
 (define-alien-type handle int-ptr)
+
+(define-alien-type lispbool (boolean 32))
+
 (define-alien-type system-string
                    #!-sb-unicode c-string
                    #!+sb-unicode (c-string :external-format :ucs-2))
@@ -649,6 +652,12 @@ UNIX epoch: January 1st 1970."
           (alien-funcall afunc aname (addr length))))
       (cast-and-free aname))))
 
+(define-alien-routine ("SetFilePointerEx" set-file-pointer-ex) lispbool
+  (handle handle)
+  (offset long-long)
+  (new-position long-long :out)
+  (whence dword))
+
 ;; File mapping support routines
 (define-alien-routine (#!+sb-unicode "CreateFileMappingW"
                        #!-sb-unicode "CreateFileMappingA"
@@ -724,6 +733,7 @@ UNIX epoch: January 1st 1970."
 (defconstant file-attribute-encrypted #x4000)
 
 (defconstant file-flag-overlapped #x40000000)
+(defconstant file-flag-sequential-scan #x8000000)
 
 ;; GetFileAttribute is like a tiny subset of fstat(),
 ;; enough to distinguish directories from anything else.
@@ -735,3 +745,158 @@ UNIX epoch: January 1st 1970."
 
 (define-alien-routine ("CloseHandle" close-handle) bool
   (handle handle))
+
+(define-alien-routine ("_open_osfhandle" open-osfhandle)
+    int
+  (handle handle)
+  (flags int))
+
+;; Intended to be an imitation of sb!unix:unix-open based on
+;; CreateFile, as complete as possibly.
+;; FILE_FLAG_OVERLAPPED is a must for decent I/O.
+
+(defun unixlike-open (path flags mode &optional revertable)
+  (declare (type sb!unix:unix-pathname path)
+           (type fixnum flags)
+           (type sb!unix:unix-file-mode mode)
+           (ignorable mode))
+  (let* ((disposition-flags
+          (logior
+           (if (zerop (logand sb!unix:o_creat flags)) 0 #b100)
+           (if (zerop (logand sb!unix:o_excl flags)) 0 #b010)
+           (if (zerop (logand sb!unix:o_trunc flags)) 0 #b001)))
+         (create-disposition
+          ;; there are 8 combinations of creat|excl|trunc, some of
+          ;; them are equivalent. Case stmt below maps them to 5
+          ;; dispositions (see CreateFile manual).
+          (case disposition-flags
+            ((#b110 #b111) file-create-new)
+            ((#b001 #b011) file-truncate-existing)
+            ((#b000 #b010) file-open-existing)
+            (#b100 file-open-always)
+            (#b101 file-create-always))))
+    (let ((handle
+           (create-file path
+                        (logior
+                         (if revertable #x10000 0)
+                         (if (plusp (logand sb!unix:o_append flags))
+                             access-file-append-data
+                             0)
+                         (ecase (logand 3 flags)
+                           (0 FILE_GENERIC_READ)
+                           (1 FILE_GENERIC_WRITE)
+                           ((2 3) (logior FILE_GENERIC_READ
+                                          FILE_GENERIC_WRITE))))
+                        (logior FILE_SHARE_READ
+                                FILE_SHARE_WRITE)
+                        nil
+                        create-disposition
+                        (logior
+                         file-attribute-normal
+                         file-flag-overlapped
+                         file-flag-sequential-scan)
+                        0)))
+      (if (eql handle invalid-handle)
+          (values nil
+                  (let ((error-code (get-last-error)))
+                    (case error-code
+                      (#.error_file_not_found
+                       sb!unix:enoent)
+                      ((#.error_already_exists #.error_file_exists)
+                       sb!unix:eexist)
+                      (otherwise (- error-code)))))
+          (progn
+            ;; FIXME: seeking to the end is not enough for real APPEND
+            ;; semantics, but it's better than nothing.
+            ;;   -- AK
+            ;;
+            ;; On the other hand, the CL spec implies the "better than
+            ;; nothing" seek-once semantics implemented here, and our
+            ;; POSIX backend is incorrect in implementing :APPEND as
+            ;; O_APPEND.  Other CL implementations get this right across
+            ;; platforms.
+            ;;
+            ;; Of course, it would be nice if we had :IF-EXISTS
+            ;; :ATOMICALLY-APPEND separately as an extension, and in
+            ;; that case, we will have to worry about supporting it
+            ;; here after all.
+            ;;
+            ;; I've tested this only very briefly (on XP and Windows 7),
+            ;; but my impression is that WriteFile (without documenting
+            ;; it?) is like ZwWriteFile, i.e. if we pass in -1 as the
+            ;; offset in our overlapped structure, WriteFile seeks to the
+            ;; end for us.  Should we depend on that?  How do we communicate
+            ;; our desire to do so to the runtime?
+            ;;   -- DFL
+            ;;
+            (set-file-pointer-ex handle 0 (if (plusp (logand sb!unix::o_append flags)) 2 0))
+            (let ((fd (open-osfhandle handle (logior sb!unix::o_binary flags))))
+              (if (minusp fd)
+                  (values nil (sb!unix::get-errno))
+                  (values fd 0))))))))
+
+(define-alien-routine ("closesocket" close-socket) int (handle handle))
+(define-alien-routine ("shutdown" shutdown-socket) int (handle handle)
+  (how int))
+
+(define-alien-routine ("DuplicateHandle" duplicate-handle) lispbool
+  (from-process handle)
+  (from-handle handle)
+  (to-process handle)
+  (to-handle handle :out)
+  (access dword)
+  (inheritp lispbool)
+  (options dword))
+
+(defconstant +handle-flag-inherit+ 1)
+(defconstant +handle-flag-protect-from-close+ 2)
+
+(define-alien-routine ("SetHandleInformation" set-handle-information) lispbool
+  (handle handle)
+  (mask dword)
+  (flags dword))
+
+(define-alien-routine ("GetHandleInformation" get-handle-information) lispbool
+  (handle handle)
+  (flags dword :out))
+
+(define-alien-routine getsockopt int
+  (handle handle)
+  (level int)
+  (opname int)
+  (dataword int-ptr :in-out)
+  (socklen int :in-out))
+
+(defconstant sol_socket #xFFFF)
+(defconstant so_type #x1008)
+
+(defun socket-handle-p (handle)
+  (zerop (getsockopt handle sol_socket so_type 0 (alien-size int :bytes))))
+
+(defconstant ebadf 9)
+
+;;; For sockets, CloseHandle first and closesocket() afterwards is
+;;; legal: winsock tracks its handles separately (that's why we have
+;;; the problem with simple _close in the first place).
+;;;
+;;; ...Seems to be the problem on some OSes, though. We could
+;;; duplicate a handle and attempt close-socket on a duplicated one,
+;;; but it also have some problems...
+;;;
+;;; For now, we protect socket handle from close with SetHandleInformation,
+;;; then call CRT _close() that fails to close a handle but _gets rid of fd_,
+;;; and then we close a handle ourserves.
+
+(defun unixlike-close (fd)
+  (let ((handle (get-osfhandle fd)))
+    (flet ((close-protection (enable)
+             (set-handle-information handle 2 (if enable 2 0))))
+      (if (= handle invalid-handle)
+          (values nil ebadf)
+          (progn
+            (when (and (socket-handle-p handle) (close-protection t))
+              (shutdown-socket handle 2)
+              (alien-funcall (extern-alien "_dup2" (function int int int)) 0 fd)
+              (close-protection nil)
+              (close-socket handle))
+            (sb!unix::void-syscall ("close" int) fd))))))
index a047e63..c81cac2 100644 (file)
@@ -29,7 +29,7 @@ OS_SRC = win32-os.c x86-win32-os.c os-common.c pthreads_win32.c
 # interface, though.:-| As far as I (WHN 2002-05-19) know, no one is
 # working on one and it would be a nice thing to have.)
 OS_LINK_FLAGS = -Wl,--export-dynamic
-OS_LIBS =
+OS_LIBS = -l ws2_32
 ifdef LISP_FEATURE_SB_CORE_COMPRESSION
   OS_LIBS += -lz
 endif
index 67a7c20..fc3f235 100644 (file)
@@ -69,6 +69,7 @@ dyndebug_init()
     dyndebug_init1(seh,            "SEH");
     dyndebug_init1(misc,           "MISC");
     dyndebug_init1(pagefaults,     "PAGEFAULTS");
+    dyndebug_init1(io,             "IO");
 
     int n_output_flags = n;
     dyndebug_init1(backtrace_when_lost, "BACKTRACE_WHEN_LOST");
index c467261..e3ff2d7 100644 (file)
@@ -130,6 +130,7 @@ extern struct dyndebug_config {
     int dyndebug_pagefaults;
     int dyndebug_backtrace_when_lost;
     int dyndebug_sleep_when_lost;
+    int dyndebug_io;
 } dyndebug_config;
 
 #ifdef LISP_FEATURE_GENCGC
index 224643d..08b56d7 100644 (file)
@@ -847,6 +847,7 @@ wake_thread_win32(struct thread *thread)
         (SymbolTlValue(STOP_FOR_GC_PENDING,thread)==T))
         return;
 
+    wake_thread_io(thread);
     pthread_mutex_unlock(&all_threads_lock);
 
     if (maybe_become_stw_initiator(1) && !in_race_p()) {
index 59e5473..f27f5ed 100644 (file)
@@ -523,6 +523,10 @@ os_invalidate_free_by_any_address(os_vm_address_t addr, os_vm_size_t len)
     AVERLAX(VirtualFree(minfo.AllocationBase, 0, MEM_RELEASE));
 }
 
+#define maybe_open_osfhandle _open_osfhandle
+#define maybe_get_osfhandle _get_osfhandle
+#define FDTYPE int
+
 /*
  * os_map() is called to map a chunk of the core file into memory.
  *
@@ -1080,13 +1084,191 @@ char *dirname(char *path)
     return buf;
 }
 
+/* Unofficial but widely used property of console handles: they have
+   #b11 in two minor bits, opposed to other handles, that are
+   machine-word-aligned. Properly emulated even on wine.
+
+   Console handles are special in many aspects, e.g. they aren't NTDLL
+   system handles: kernel32 redirects console operations to CSRSS
+   requests. Using the hack below to distinguish console handles is
+   justified, as it's the only method that won't hang during
+   outstanding reads, won't try to lock NT kernel object (if there is
+   one; console isn't), etc. */
+int
+console_handle_p(HANDLE handle)
+{
+    return (handle != NULL)&&
+        (handle != INVALID_HANDLE_VALUE)&&
+        ((((int)(intptr_t)handle)&3)==3);
+}
+
+static const LARGE_INTEGER zero_large_offset = {.QuadPart = 0LL};
+
+int
+win32_unix_write(FDTYPE fd, void * buf, int count)
+{
+    HANDLE handle;
+    DWORD written_bytes;
+    OVERLAPPED overlapped;
+    struct thread * self = arch_os_get_current_thread();
+    BOOL waitInGOR;
+    LARGE_INTEGER file_position;
+    BOOL seekable;
+    BOOL ok;
+
+    handle =(HANDLE)maybe_get_osfhandle(fd);
+    if (console_handle_p(handle))
+        return write(fd, buf, count);
+
+    overlapped.hEvent = self->private_events.events[0];
+    seekable = SetFilePointerEx(handle,
+                                zero_large_offset,
+                                &file_position,
+                                FILE_CURRENT);
+    if (seekable) {
+        overlapped.Offset = file_position.LowPart;
+        overlapped.OffsetHigh = file_position.HighPart;
+    } else {
+        overlapped.Offset = 0;
+        overlapped.OffsetHigh = 0;
+    }
+    ok = WriteFile(handle, buf, count, &written_bytes, &overlapped);
+
+    if (ok) {
+        goto done_something;
+    } else {
+        if (GetLastError()!=ERROR_IO_PENDING) {
+            errno = EIO;
+            return -1;
+        } else {
+            if(WaitForMultipleObjects(2,self->private_events.events,
+                                      FALSE,INFINITE) != WAIT_OBJECT_0) {
+                CancelIo(handle);
+                waitInGOR = TRUE;
+            } else {
+                waitInGOR = FALSE;
+            }
+            if (!GetOverlappedResult(handle,&overlapped,&written_bytes,waitInGOR)) {
+                if (GetLastError()==ERROR_OPERATION_ABORTED) {
+                    errno = EINTR;
+                } else {
+                    errno = EIO;
+                }
+                return -1;
+            } else {
+                goto done_something;
+            }
+        }
+    }
+  done_something:
+    if (seekable) {
+        file_position.QuadPart += written_bytes;
+        SetFilePointerEx(handle,file_position,NULL,FILE_BEGIN);
+    }
+    return written_bytes;
+}
+
+int
+win32_unix_read(FDTYPE fd, void * buf, int count)
+{
+    HANDLE handle;
+    OVERLAPPED overlapped = {.Internal=0};
+    DWORD read_bytes = 0;
+    struct thread * self = arch_os_get_current_thread();
+    DWORD errorCode = 0;
+    BOOL waitInGOR = FALSE;
+    BOOL ok = FALSE;
+    LARGE_INTEGER file_position;
+    BOOL seekable;
+
+    handle = (HANDLE)maybe_get_osfhandle(fd);
+
+    if (console_handle_p(handle)) {
+        /* 1. Console is a singleton.
+           2. The only way to cancel console handle I/O is to close it.
+        */
+    if (console_handle_p(handle))
+        return read(fd, buf, count);
+    }
+    overlapped.hEvent = self->private_events.events[0];
+    /* If it has a position, we won't try overlapped */
+    seekable = SetFilePointerEx(handle,
+                                zero_large_offset,
+                                &file_position,
+                                FILE_CURRENT);
+    if (seekable) {
+        overlapped.Offset = file_position.LowPart;
+        overlapped.OffsetHigh = file_position.HighPart;
+    } else {
+        overlapped.Offset = 0;
+        overlapped.OffsetHigh = 0;
+    }
+    ok = ReadFile(handle,buf,count,&read_bytes, &overlapped);
+    if (ok) {
+        /* immediately */
+        goto done_something;
+    } else {
+        errorCode = GetLastError();
+        if (errorCode == ERROR_HANDLE_EOF ||
+            errorCode == ERROR_BROKEN_PIPE ||
+            errorCode == ERROR_NETNAME_DELETED) {
+            read_bytes = 0;
+            goto done_something;
+        }
+        if (errorCode!=ERROR_IO_PENDING) {
+            /* is it some _real_ error? */
+            errno = EIO;
+            return -1;
+        } else {
+            int ret;
+            if( (ret = WaitForMultipleObjects(2,self->private_events.events,
+                                              FALSE,INFINITE)) != WAIT_OBJECT_0) {
+                CancelIo(handle);
+                waitInGOR = TRUE;
+                /* Waiting for IO only */
+            } else {
+                waitInGOR = FALSE;
+            }
+            ok = GetOverlappedResult(handle,&overlapped,&read_bytes,waitInGOR);
+            if (!ok) {
+                errorCode = GetLastError();
+                if (errorCode == ERROR_HANDLE_EOF ||
+                    errorCode == ERROR_BROKEN_PIPE ||
+                    errorCode == ERROR_NETNAME_DELETED) {
+                    read_bytes = 0;
+                    goto done_something;
+                } else {
+                    if (errorCode == ERROR_OPERATION_ABORTED)
+                        errno = EINTR;      /* that's it. */
+                    else
+                        errno = EIO;        /* something unspecific */
+                    return -1;
+                }
+            } else
+                goto done_something;
+        }
+    }
+  done_something:
+    if (seekable) {
+        file_position.QuadPart += read_bytes;
+        SetFilePointerEx(handle,file_position,NULL,FILE_BEGIN);
+    }
+    return read_bytes;
+}
+
 /* This is a manually-maintained version of ldso_stubs.S. */
 
 void __stdcall RtlUnwind(void *, void *, void *, void *); /* I don't have winternl.h */
 
 void scratch(void)
 {
+    LARGE_INTEGER la = {{0}};
+    closesocket(0);
     CloseHandle(0);
+    shutdown(0, 0);
+    SetHandleInformation(0, 0, 0);
+    GetHandleInformation(0, 0);
+    getsockopt(0, 0, 0, 0, 0);
     FlushConsoleInputBuffer(0);
     FormatMessageA(0, 0, 0, 0, 0, 0, 0);
     FreeLibrary(0);
@@ -1108,6 +1290,7 @@ void scratch(void)
     Sleep(0);
     WriteFile(0, 0, 0, 0, 0);
     _get_osfhandle(0);
+    _open_osfhandle(0, 0);
     _rmdir(0);
     _pipe(0,0,0);
     access(0,0);
@@ -1120,6 +1303,7 @@ void scratch(void)
     MapViewOfFile(0,0,0,0,0);
     UnmapViewOfFile(0);
     FlushViewOfFile(0,0);
+    SetFilePointerEx(0, la, 0, 0);
     #ifndef LISP_FEATURE_SB_UNICODE
       CreateDirectoryA(0,0);
       CreateFileMappingA(0,0,0,0,0,0);
index 6e93419..125f884 100644 (file)
@@ -191,7 +191,13 @@ main(int argc, char *argv[])
 
     printf(";;; Errors\n");
 
-    defconstant ("ERROR_ENVVAR_NOT_FOUND", ERROR_ENVVAR_NOT_FOUND);
+    printf(";;; Errors\n");
+
+    defconstant("ERROR_ENVVAR_NOT_FOUND", ERROR_ENVVAR_NOT_FOUND);
+    defconstant("ERROR_ALREADY_EXISTS", ERROR_ALREADY_EXISTS);
+    defconstant("ERROR_FILE_EXISTS", ERROR_FILE_EXISTS);
+    defconstant("ERROR_FILE_NOT_FOUND", ERROR_FILE_NOT_FOUND);
+    defconstant("ERROR_ACCESS_DENIED", ERROR_ACCESS_DENIED);
 
     printf(";;; GetComputerName\n");